xref: /original-bsd/usr.bin/file/file.c (revision 262b24ac)
1 #ifndef lint
2 static	char sccsid[] = "@(#)file.c	4.15 (Berkeley) 06/01/88";
3 #endif
4 /*
5  * file - determine type of file
6  */
7 
8 #include <sys/param.h>
9 #include <sys/stat.h>
10 #include <stdio.h>
11 #include <ctype.h>
12 #include <a.out.h>
13 
14 extern int	errno;
15 extern int	sys_nerr;
16 extern char	*sys_errlist[];
17 
18 int in;
19 int i  = 0;
20 char buf[BUFSIZ];
21 char *troff[] = {	/* new troff intermediate lang */
22 	"x","T","res","init","font","202","V0","p1",0};
23 char *fort[] = {
24 	"function","subroutine","common","dimension","block","integer",
25 	"real","data","double",0};
26 char *asc[] = {
27 	"chmk","mov","tst","clr","jmp",0};
28 char *c[] = {
29 	"int","char","float","double","struct","extern",0};
30 char *as[] = {
31 	"globl","byte","align","text","data","comm",0};
32 char *sh[] = {
33 	"fi", "elif", "esac", "done", "export",
34 	"readonly", "trap", "PATH", "HOME", 0 };
35 char *csh[] = {
36 	"alias", "breaksw", "endsw", "foreach", "limit",  "onintr",
37 	"repeat", "setenv", "source", "path", "home", 0 };
38 int	ifile;
39 
40 main(argc, argv)
41 char **argv;
42 {
43 	FILE *fl;
44 	register char *p;
45 	char ap[MAXPATHLEN + 1];
46 
47 	if (argc < 2) {
48 		fprintf(stderr, "usage: %s file ...\n", argv[0]);
49 		exit(3);
50 	}
51 
52 	if (argc>1 && argv[1][0]=='-' && argv[1][1]=='f') {
53 		if ((fl = fopen(argv[2], "r")) == NULL) {
54 			perror(argv[2]);
55 			exit(2);
56 		}
57 		while ((p = fgets(ap, sizeof ap, fl)) != NULL) {
58 			int l = strlen(p);
59 			if (l>0)
60 				p[l-1] = '\0';
61 			printf("%s:	", p);
62 			type(p);
63 			if (ifile>=0)
64 				close(ifile);
65 		}
66 		exit(1);
67 	}
68 	while(argc > 1) {
69 		printf("%s:	", argv[1]);
70 		type(argv[1]);
71 		fflush(stdout);
72 		argc--;
73 		argv++;
74 		if (ifile >= 0)
75 			close(ifile);
76 	}
77 	exit(0);
78 }
79 
80 type(file)
81 char *file;
82 {
83 	int j,nl;
84 	char ch;
85 	struct stat mbuf;
86 	char slink[MAXPATHLEN + 1];
87 
88 	ifile = -1;
89 	if (lstat(file, &mbuf) < 0) {
90 		printf("%s\n",
91 		(unsigned)errno < sys_nerr? sys_errlist[errno]: "Cannot stat");
92 		return;
93 	}
94 	switch (mbuf.st_mode & S_IFMT) {
95 
96 	case S_IFLNK:
97 		printf("symbolic link");
98 		j = readlink(file, slink, sizeof slink - 1);
99 		if (j >= 0) {
100 			slink[j] = '\0';
101 			printf(" to %s", slink);
102 		}
103 		printf("\n");
104 		return;
105 
106 	case S_IFDIR:
107 		if (mbuf.st_mode & S_ISVTX)
108 			printf("append-only ");
109 		printf("directory\n");
110 		return;
111 
112 	case S_IFCHR:
113 	case S_IFBLK:
114 		printf("%s special (%d/%d)\n",
115 		    (mbuf.st_mode&S_IFMT) == S_IFCHR ? "character" : "block",
116 		     major(mbuf.st_rdev), minor(mbuf.st_rdev));
117 		return;
118 
119 	case S_IFSOCK:
120 		printf("socket\n");
121 		return;
122 	}
123 
124 	ifile = open(file, 0);
125 	if(ifile < 0) {
126 		printf("%s\n",
127 		(unsigned)errno < sys_nerr? sys_errlist[errno]: "Cannot read");
128 		return;
129 	}
130 	in = read(ifile, buf, BUFSIZ);
131 	if(in == 0){
132 		printf("empty\n");
133 		return;
134 	}
135 	switch(*(int *)buf) {
136 
137 	case 0413:
138 		printf("demand paged ");
139 
140 	case 0410:
141 		printf("pure ");
142 		goto exec;
143 
144 	case 0411:
145 		printf("jfr or pdp-11 unix 411 executable\n");
146 		return;
147 
148 	case 0407:
149 exec:
150 		if (mbuf.st_mode & S_ISUID)
151 			printf("set-uid ");
152 		if (mbuf.st_mode & S_ISGID)
153 			printf("set-gid ");
154 		if (mbuf.st_mode & S_ISVTX)
155 			printf("sticky ");
156 		printf("executable");
157 		if(((int *)buf)[4] != 0) {
158 			printf(" not stripped");
159 			if(oldo(buf))
160 				printf(" (old format symbol table)");
161 		}
162 		printf("\n");
163 		return;
164 
165 	case 0177555:
166 		printf("very old archive\n");
167 		return;
168 
169 	case 0177545:
170 		printf("old archive\n");
171 		return;
172 
173 	case 070707:
174 		printf("cpio data\n");
175 		return;
176 	}
177 
178 	if (buf[0] == '#' && buf[1] == '!' && shellscript(buf+2, &mbuf))
179 		return;
180 	if (buf[0] == '\037' && buf[1] == '\235') {
181 		if (buf[2]&0x80)
182 			printf("block ");
183 		printf("compressed %d bit code data\n", buf[2]&0x1f);
184 		return;
185 	}
186 	if(strncmp(buf, "!<arch>\n__.SYMDEF", 17) == 0 ) {
187 		printf("archive random library\n");
188 		return;
189 	}
190 	if (strncmp(buf, "!<arch>\n", 8)==0) {
191 		printf("archive\n");
192 		return;
193 	}
194 	if (mbuf.st_size % 512 == 0) {	/* it may be a PRESS file */
195 		lseek(ifile, -512L, 2);	/* last block */
196 		if (read(ifile, buf, BUFSIZ) > 0 && *(short *)buf == 12138) {
197 			printf("PRESS file\n");
198 			return;
199 		}
200 	}
201 	i = 0;
202 	if(ccom() == 0)goto notc;
203 	while(buf[i] == '#'){
204 		j = i;
205 		while(buf[i++] != '\n'){
206 			if(i - j > 255){
207 				printf("data\n");
208 				return;
209 			}
210 			if(i >= in)goto notc;
211 		}
212 		if(ccom() == 0)goto notc;
213 	}
214 check:
215 	if(lookup(c) == 1){
216 		while((ch = buf[i++]) != ';' && ch != '{')if(i >= in)goto notc;
217 		printf("c program text");
218 		goto outa;
219 	}
220 	nl = 0;
221 	while(buf[i] != '('){
222 		if(buf[i] <= 0)
223 			goto notas;
224 		if(buf[i] == ';'){
225 			i++;
226 			goto check;
227 		}
228 		if(buf[i++] == '\n')
229 			if(nl++ > 6)goto notc;
230 		if(i >= in)goto notc;
231 	}
232 	while(buf[i] != ')'){
233 		if(buf[i++] == '\n')
234 			if(nl++ > 6)goto notc;
235 		if(i >= in)goto notc;
236 	}
237 	while(buf[i] != '{'){
238 		if(buf[i++] == '\n')
239 			if(nl++ > 6)goto notc;
240 		if(i >= in)goto notc;
241 	}
242 	printf("c program text");
243 	goto outa;
244 notc:
245 	i = 0;
246 	while(buf[i] == 'c' || buf[i] == '#'){
247 		while(buf[i++] != '\n')if(i >= in)goto notfort;
248 	}
249 	if(lookup(fort) == 1){
250 		printf("fortran program text");
251 		goto outa;
252 	}
253 notfort:
254 	i=0;
255 	if(ascom() == 0)goto notas;
256 	j = i-1;
257 	if(buf[i] == '.'){
258 		i++;
259 		if(lookup(as) == 1){
260 			printf("assembler program text");
261 			goto outa;
262 		}
263 		else if(buf[j] == '\n' && isalpha(buf[j+2])){
264 			printf("roff, nroff, or eqn input text");
265 			goto outa;
266 		}
267 	}
268 	while(lookup(asc) == 0){
269 		if(ascom() == 0)goto notas;
270 		while(buf[i] != '\n' && buf[i++] != ':')
271 			if(i >= in)goto notas;
272 		while(buf[i] == '\n' || buf[i] == ' ' || buf[i] == '\t')if(i++ >= in)goto notas;
273 		j = i-1;
274 		if(buf[i] == '.'){
275 			i++;
276 			if(lookup(as) == 1){
277 				printf("assembler program text");
278 				goto outa;
279 			}
280 			else if(buf[j] == '\n' && isalpha(buf[j+2])){
281 				printf("roff, nroff, or eqn input text");
282 				goto outa;
283 			}
284 		}
285 	}
286 	printf("assembler program text");
287 	goto outa;
288 notas:
289 	for(i=0; i < in; i++)if(buf[i]&0200){
290 		if (buf[0]=='\100' && buf[1]=='\357')
291 			printf("troff (CAT) output\n");
292 		else
293 			printf("data\n");
294 		return;
295 	}
296 	if (mbuf.st_mode&((S_IEXEC)|(S_IEXEC>>3)|(S_IEXEC>>6))) {
297 		if (mbuf.st_mode & S_ISUID)
298 			printf("set-uid ");
299 		if (mbuf.st_mode & S_ISGID)
300 			printf("set-gid ");
301 		if (mbuf.st_mode & S_ISVTX)
302 			printf("sticky ");
303 		if (shell(buf, in, sh))
304 			printf("shell script");
305 		else if (shell(buf, in, csh))
306 			printf("c-shell script");
307 		else
308 			printf("commands text");
309 	} else if (troffint(buf, in))
310 		printf("troff intermediate output text");
311 	else if (shell(buf, in, sh))
312 		printf("shell commands");
313 	else if (shell(buf, in, csh))
314 		printf("c-shell commands");
315 	else if (english(buf, in))
316 		printf("English text");
317 	else
318 		printf("ascii text");
319 outa:
320 	while(i < in)
321 		if((buf[i++]&0377) > 127){
322 			printf(" with garbage\n");
323 			return;
324 		}
325 	/* if next few lines in then read whole file looking for nulls ...
326 		while((in = read(ifile,buf,BUFSIZ)) > 0)
327 			for(i = 0; i < in; i++)
328 				if((buf[i]&0377) > 127){
329 					printf(" with garbage\n");
330 					return;
331 				}
332 		/*.... */
333 	printf("\n");
334 }
335 
336 oldo(cp)
337 char *cp;
338 {
339 	struct exec ex;
340 	struct stat stb;
341 
342 	ex = *(struct exec *)cp;
343 	if (fstat(ifile, &stb) < 0)
344 		return(0);
345 	if (N_STROFF(ex)+sizeof(off_t) > stb.st_size)
346 		return (1);
347 	return (0);
348 }
349 
350 
351 
352 troffint(bp, n)
353 char *bp;
354 int n;
355 {
356 	int k;
357 
358 	i = 0;
359 	for (k = 0; k < 6; k++) {
360 		if (lookup(troff) == 0)
361 			return(0);
362 		if (lookup(troff) == 0)
363 			return(0);
364 		while (i < n && buf[i] != '\n')
365 			i++;
366 		if (i++ >= n)
367 			return(0);
368 	}
369 	return(1);
370 }
371 lookup(tab)
372 char *tab[];
373 {
374 	char r;
375 	int k,j,l;
376 	while(buf[i] == ' ' || buf[i] == '\t' || buf[i] == '\n')i++;
377 	for(j=0; tab[j] != 0; j++){
378 		l=0;
379 		for(k=i; ((r=tab[j][l++]) == buf[k] && r != '\0');k++);
380 		if(r == '\0')
381 			if(buf[k] == ' ' || buf[k] == '\n' || buf[k] == '\t'
382 			    || buf[k] == '{' || buf[k] == '/'){
383 				i=k;
384 				return(1);
385 			}
386 	}
387 	return(0);
388 }
389 ccom(){
390 	char cc;
391 	while((cc = buf[i]) == ' ' || cc == '\t' || cc == '\n')if(i++ >= in)return(0);
392 	if(buf[i] == '/' && buf[i+1] == '*'){
393 		i += 2;
394 		while(buf[i] != '*' || buf[i+1] != '/'){
395 			if(buf[i] == '\\')i += 2;
396 			else i++;
397 			if(i >= in)return(0);
398 		}
399 		if((i += 2) >= in)return(0);
400 	}
401 	if(buf[i] == '\n')if(ccom() == 0)return(0);
402 	return(1);
403 }
404 ascom(){
405 	while(buf[i] == '/'){
406 		i++;
407 		while(buf[i++] != '\n')if(i >= in)return(0);
408 		while(buf[i] == '\n')if(i++ >= in)return(0);
409 	}
410 	return(1);
411 }
412 
413 english (bp, n)
414 char *bp;
415 {
416 # define NASC 128
417 	int ct[NASC], j, vow, freq, rare;
418 	int badpun = 0, punct = 0;
419 	if (n<50) return(0); /* no point in statistics on squibs */
420 	for(j=0; j<NASC; j++)
421 		ct[j]=0;
422 	for(j=0; j<n; j++)
423 	{
424 		if ((u_char)bp[j]<NASC)
425 			ct[bp[j]|040]++;
426 		switch (bp[j])
427 		{
428 		case '.':
429 		case ',':
430 		case ')':
431 		case '%':
432 		case ';':
433 		case ':':
434 		case '?':
435 			punct++;
436 			if ( j < n-1 &&
437 			    bp[j+1] != ' ' &&
438 			    bp[j+1] != '\n')
439 				badpun++;
440 		}
441 	}
442 	if (badpun*5 > punct)
443 		return(0);
444 	vow = ct['a'] + ct['e'] + ct['i'] + ct['o'] + ct['u'];
445 	freq = ct['e'] + ct['t'] + ct['a'] + ct['i'] + ct['o'] + ct['n'];
446 	rare = ct['v'] + ct['j'] + ct['k'] + ct['q'] + ct['x'] + ct['z'];
447 	if (2*ct[';'] > ct['e']) return(0);
448 	if ( (ct['>']+ct['<']+ct['/'])>ct['e']) return(0); /* shell file test */
449 	return (vow*5 >= n-ct[' '] && freq >= 10*rare);
450 }
451 
452 shellscript(buf, sb)
453 	char buf[];
454 	struct stat *sb;
455 {
456 	register char *tp;
457 	char *cp, *xp, *index();
458 
459 	cp = index(buf, '\n');
460 	if (cp == 0 || cp - buf > in)
461 		return (0);
462 	for (tp = buf; tp != cp && isspace(*tp); tp++)
463 		if (!isascii(*tp))
464 			return (0);
465 	for (xp = tp; tp != cp && !isspace(*tp); tp++)
466 		if (!isascii(*tp))
467 			return (0);
468 	if (tp == xp)
469 		return (0);
470 	if (sb->st_mode & S_ISUID)
471 		printf("set-uid ");
472 	if (sb->st_mode & S_ISGID)
473 		printf("set-gid ");
474 	if (strncmp(xp, "/bin/sh", tp-xp) == 0)
475 		xp = "shell";
476 	else if (strncmp(xp, "/bin/csh", tp-xp) == 0)
477 		xp = "c-shell";
478 	else
479 		*tp = '\0';
480 	printf("executable %s script\n", xp);
481 	return (1);
482 }
483 
484 shell(bp, n, tab)
485 	char *bp;
486 	int n;
487 	char *tab[];
488 {
489 
490 	i = 0;
491 	do {
492 		if (buf[i] == '#' || buf[i] == ':')
493 			while (i < n && buf[i] != '\n')
494 				i++;
495 		if (++i >= n)
496 			break;
497 		if (lookup(tab) == 1)
498 			return (1);
499 	} while (i < n);
500 	return (0);
501 }
502