xref: /original-bsd/usr.bin/file/file.c (revision a9c19d04)
1 #ifndef lint
2 static	char sccsid[] = "@(#)file.c	4.12 (Berkeley) 11/17/85";
3 #endif
4 /*
5  * file - determine type of file
6  */
7 
8 #include <sys/param.h>
9 #include <sys/stat.h>
10 #include <stdio.h>
11 #include <ctype.h>
12 #include <a.out.h>
13 int	errno;
14 int	sys_nerr;
15 char	*sys_errlist[];
16 int in;
17 int i  = 0;
18 char buf[BUFSIZ];
19 char *troff[] = {	/* new troff intermediate lang */
20 	"x","T","res","init","font","202","V0","p1",0};
21 char *fort[] = {
22 	"function","subroutine","common","dimension","block","integer",
23 	"real","data","double",0};
24 char *asc[] = {
25 	"chmk","mov","tst","clr","jmp",0};
26 char *c[] = {
27 	"int","char","float","double","struct","extern",0};
28 char *as[] = {
29 	"globl","byte","align","text","data","comm",0};
30 char *sh[] = {
31 	"fi", "elif", "esac", "done", "export",
32 	"readonly", "trap", "PATH", "HOME", 0 };
33 char *csh[] = {
34 	"alias", "breaksw", "endsw", "foreach", "limit",  "onintr",
35 	"repeat", "setenv", "source", "path", "home", 0 };
36 int	ifile;
37 
38 main(argc, argv)
39 char **argv;
40 {
41 	FILE *fl;
42 	register char *p;
43 	char ap[MAXPATHLEN + 1];
44 	extern char _sobuf[];
45 
46 	if (argc < 2) {
47 		fprintf(stderr, "usage: %s file ...\n", argv[0]);
48 		exit(3);
49 	}
50 
51 	if (argc>1 && argv[1][0]=='-' && argv[1][1]=='f') {
52 		if ((fl = fopen(argv[2], "r")) == NULL) {
53 			perror(argv[2]);
54 			exit(2);
55 		}
56 		while ((p = fgets(ap, sizeof ap, fl)) != NULL) {
57 			int l = strlen(p);
58 			if (l>0)
59 				p[l-1] = '\0';
60 			printf("%s:	", p);
61 			type(p);
62 			if (ifile>=0)
63 				close(ifile);
64 		}
65 		exit(1);
66 	}
67 	while(argc > 1) {
68 		printf("%s:	", argv[1]);
69 		type(argv[1]);
70 		fflush(stdout);
71 		argc--;
72 		argv++;
73 		if (ifile >= 0)
74 			close(ifile);
75 	}
76 	exit(0);
77 }
78 
79 type(file)
80 char *file;
81 {
82 	int j,nl;
83 	char ch;
84 	struct stat mbuf;
85 	char slink[MAXPATHLEN + 1];
86 
87 	ifile = -1;
88 	if (lstat(file, &mbuf) < 0) {
89 		printf("%s\n",
90 		(unsigned)errno < sys_nerr? sys_errlist[errno]: "Cannot stat");
91 		return;
92 	}
93 	switch (mbuf.st_mode & S_IFMT) {
94 
95 	case S_IFLNK:
96 		printf("symbolic link");
97 		j = readlink(file, slink, sizeof slink - 1);
98 		if (j >= 0) {
99 			slink[j] = '\0';
100 			printf(" to %s", slink);
101 		}
102 		printf("\n");
103 		return;
104 
105 	case S_IFDIR:
106 		if (mbuf.st_mode & S_ISVTX)
107 			printf("append-only ");
108 		printf("directory\n");
109 		return;
110 
111 	case S_IFCHR:
112 	case S_IFBLK:
113 		printf("%s special (%d/%d)\n",
114 		    (mbuf.st_mode&S_IFMT) == S_IFCHR ? "character" : "block",
115 		     major(mbuf.st_rdev), minor(mbuf.st_rdev));
116 		return;
117 
118 	case S_IFSOCK:
119 		printf("socket\n");
120 		return;
121 	}
122 
123 	ifile = open(file, 0);
124 	if(ifile < 0) {
125 		printf("%s\n",
126 		(unsigned)errno < sys_nerr? sys_errlist[errno]: "Cannot read");
127 		return;
128 	}
129 	in = read(ifile, buf, BUFSIZ);
130 	if(in == 0){
131 		printf("empty\n");
132 		return;
133 	}
134 	switch(*(int *)buf) {
135 
136 	case 0413:
137 		printf("demand paged ");
138 
139 	case 0410:
140 		printf("pure ");
141 		goto exec;
142 
143 	case 0411:
144 		printf("jfr or pdp-11 unix 411 executable\n");
145 		return;
146 
147 	case 0407:
148 exec:
149 		if (mbuf.st_mode & S_ISUID)
150 			printf("set-uid ");
151 		if (mbuf.st_mode & S_ISGID)
152 			printf("set-gid ");
153 		if (mbuf.st_mode & S_ISVTX)
154 			printf("sticky ");
155 		printf("executable");
156 		if(((int *)buf)[4] != 0) {
157 			printf(" not stripped");
158 			if(oldo(buf))
159 				printf(" (old format symbol table)");
160 		}
161 		printf("\n");
162 		return;
163 
164 	case 0177555:
165 		printf("very old archive\n");
166 		return;
167 
168 	case 0177545:
169 		printf("old archive\n");
170 		return;
171 
172 	case 070707:
173 		printf("cpio data\n");
174 		return;
175 	}
176 
177 	if (buf[0] == '#' && buf[1] == '!' && shellscript(buf+2, &mbuf))
178 		return;
179 	if (buf[0] == '\037' && buf[1] == '\235') {
180 		if (buf[2]&0x80)
181 			printf("block ");
182 		printf("compressed %d bit code data\n", buf[2]&0x1f);
183 		return;
184 	}
185 	if(strncmp(buf, "!<arch>\n__.SYMDEF", 17) == 0 ) {
186 		printf("archive random library\n");
187 		return;
188 	}
189 	if (strncmp(buf, "!<arch>\n", 8)==0) {
190 		printf("archive\n");
191 		return;
192 	}
193 	if (mbuf.st_size % 512 == 0) {	/* it may be a PRESS file */
194 		lseek(ifile, -512L, 2);	/* last block */
195 		if (read(ifile, buf, BUFSIZ) > 0 && *(short *)buf == 12138) {
196 			printf("PRESS file\n");
197 			return;
198 		}
199 	}
200 	i = 0;
201 	if(ccom() == 0)goto notc;
202 	while(buf[i] == '#'){
203 		j = i;
204 		while(buf[i++] != '\n'){
205 			if(i - j > 255){
206 				printf("data\n");
207 				return;
208 			}
209 			if(i >= in)goto notc;
210 		}
211 		if(ccom() == 0)goto notc;
212 	}
213 check:
214 	if(lookup(c) == 1){
215 		while((ch = buf[i++]) != ';' && ch != '{')if(i >= in)goto notc;
216 		printf("c program text");
217 		goto outa;
218 	}
219 	nl = 0;
220 	while(buf[i] != '('){
221 		if(buf[i] <= 0)
222 			goto notas;
223 		if(buf[i] == ';'){
224 			i++;
225 			goto check;
226 		}
227 		if(buf[i++] == '\n')
228 			if(nl++ > 6)goto notc;
229 		if(i >= in)goto notc;
230 	}
231 	while(buf[i] != ')'){
232 		if(buf[i++] == '\n')
233 			if(nl++ > 6)goto notc;
234 		if(i >= in)goto notc;
235 	}
236 	while(buf[i] != '{'){
237 		if(buf[i++] == '\n')
238 			if(nl++ > 6)goto notc;
239 		if(i >= in)goto notc;
240 	}
241 	printf("c program text");
242 	goto outa;
243 notc:
244 	i = 0;
245 	while(buf[i] == 'c' || buf[i] == '#'){
246 		while(buf[i++] != '\n')if(i >= in)goto notfort;
247 	}
248 	if(lookup(fort) == 1){
249 		printf("fortran program text");
250 		goto outa;
251 	}
252 notfort:
253 	i=0;
254 	if(ascom() == 0)goto notas;
255 	j = i-1;
256 	if(buf[i] == '.'){
257 		i++;
258 		if(lookup(as) == 1){
259 			printf("assembler program text");
260 			goto outa;
261 		}
262 		else if(buf[j] == '\n' && isalpha(buf[j+2])){
263 			printf("roff, nroff, or eqn input text");
264 			goto outa;
265 		}
266 	}
267 	while(lookup(asc) == 0){
268 		if(ascom() == 0)goto notas;
269 		while(buf[i] != '\n' && buf[i++] != ':')
270 			if(i >= in)goto notas;
271 		while(buf[i] == '\n' || buf[i] == ' ' || buf[i] == '\t')if(i++ >= in)goto notas;
272 		j = i-1;
273 		if(buf[i] == '.'){
274 			i++;
275 			if(lookup(as) == 1){
276 				printf("assembler program text");
277 				goto outa;
278 			}
279 			else if(buf[j] == '\n' && isalpha(buf[j+2])){
280 				printf("roff, nroff, or eqn input text");
281 				goto outa;
282 			}
283 		}
284 	}
285 	printf("assembler program text");
286 	goto outa;
287 notas:
288 	for(i=0; i < in; i++)if(buf[i]&0200){
289 		if (buf[0]=='\100' && buf[1]=='\357')
290 			printf("troff (CAT) output\n");
291 		else
292 			printf("data\n");
293 		return;
294 	}
295 	if (mbuf.st_mode&((S_IEXEC)|(S_IEXEC>>3)|(S_IEXEC>>6))) {
296 		if (mbuf.st_mode & S_ISUID)
297 			printf("set-uid ");
298 		if (mbuf.st_mode & S_ISGID)
299 			printf("set-gid ");
300 		if (mbuf.st_mode & S_ISVTX)
301 			printf("sticky ");
302 		if (shell(buf, in, sh))
303 			printf("shell script");
304 		else if (shell(buf, in, csh))
305 			printf("c-shell script");
306 		else
307 			printf("commands text");
308 	} else if (troffint(buf, in))
309 		printf("troff intermediate output text");
310 	else if (shell(buf, in, sh))
311 		printf("shell commands");
312 	else if (shell(buf, in, csh))
313 		printf("c-shell commands");
314 	else if (english(buf, in))
315 		printf("English text");
316 	else
317 		printf("ascii text");
318 outa:
319 	while(i < in)
320 		if((buf[i++]&0377) > 127){
321 			printf(" with garbage\n");
322 			return;
323 		}
324 	/* if next few lines in then read whole file looking for nulls ...
325 		while((in = read(ifile,buf,BUFSIZ)) > 0)
326 			for(i = 0; i < in; i++)
327 				if((buf[i]&0377) > 127){
328 					printf(" with garbage\n");
329 					return;
330 				}
331 		/*.... */
332 	printf("\n");
333 }
334 
335 oldo(cp)
336 char *cp;
337 {
338 	struct exec ex;
339 	struct stat stb;
340 
341 	ex = *(struct exec *)cp;
342 	if (fstat(ifile, &stb) < 0)
343 		return(0);
344 	if (N_STROFF(ex)+sizeof(off_t) > stb.st_size)
345 		return (1);
346 	return (0);
347 }
348 
349 
350 
351 troffint(bp, n)
352 char *bp;
353 int n;
354 {
355 	int k;
356 
357 	i = 0;
358 	for (k = 0; k < 6; k++) {
359 		if (lookup(troff) == 0)
360 			return(0);
361 		if (lookup(troff) == 0)
362 			return(0);
363 		while (i < n && buf[i] != '\n')
364 			i++;
365 		if (i++ >= n)
366 			return(0);
367 	}
368 	return(1);
369 }
370 lookup(tab)
371 char *tab[];
372 {
373 	char r;
374 	int k,j,l;
375 	while(buf[i] == ' ' || buf[i] == '\t' || buf[i] == '\n')i++;
376 	for(j=0; tab[j] != 0; j++){
377 		l=0;
378 		for(k=i; ((r=tab[j][l++]) == buf[k] && r != '\0');k++);
379 		if(r == '\0')
380 			if(buf[k] == ' ' || buf[k] == '\n' || buf[k] == '\t'
381 			    || buf[k] == '{' || buf[k] == '/'){
382 				i=k;
383 				return(1);
384 			}
385 	}
386 	return(0);
387 }
388 ccom(){
389 	char cc;
390 	while((cc = buf[i]) == ' ' || cc == '\t' || cc == '\n')if(i++ >= in)return(0);
391 	if(buf[i] == '/' && buf[i+1] == '*'){
392 		i += 2;
393 		while(buf[i] != '*' || buf[i+1] != '/'){
394 			if(buf[i] == '\\')i += 2;
395 			else i++;
396 			if(i >= in)return(0);
397 		}
398 		if((i += 2) >= in)return(0);
399 	}
400 	if(buf[i] == '\n')if(ccom() == 0)return(0);
401 	return(1);
402 }
403 ascom(){
404 	while(buf[i] == '/'){
405 		i++;
406 		while(buf[i++] != '\n')if(i >= in)return(0);
407 		while(buf[i] == '\n')if(i++ >= in)return(0);
408 	}
409 	return(1);
410 }
411 
412 english (bp, n)
413 char *bp;
414 {
415 # define NASC 128
416 	int ct[NASC], j, vow, freq, rare;
417 	int badpun = 0, punct = 0;
418 	if (n<50) return(0); /* no point in statistics on squibs */
419 	for(j=0; j<NASC; j++)
420 		ct[j]=0;
421 	for(j=0; j<n; j++)
422 	{
423 		if (bp[j]<NASC)
424 			ct[bp[j]|040]++;
425 		switch (bp[j])
426 		{
427 		case '.':
428 		case ',':
429 		case ')':
430 		case '%':
431 		case ';':
432 		case ':':
433 		case '?':
434 			punct++;
435 			if ( j < n-1 &&
436 			    bp[j+1] != ' ' &&
437 			    bp[j+1] != '\n')
438 				badpun++;
439 		}
440 	}
441 	if (badpun*5 > punct)
442 		return(0);
443 	vow = ct['a'] + ct['e'] + ct['i'] + ct['o'] + ct['u'];
444 	freq = ct['e'] + ct['t'] + ct['a'] + ct['i'] + ct['o'] + ct['n'];
445 	rare = ct['v'] + ct['j'] + ct['k'] + ct['q'] + ct['x'] + ct['z'];
446 	if (2*ct[';'] > ct['e']) return(0);
447 	if ( (ct['>']+ct['<']+ct['/'])>ct['e']) return(0); /* shell file test */
448 	return (vow*5 >= n-ct[' '] && freq >= 10*rare);
449 }
450 
451 shellscript(buf, sb)
452 	char buf[];
453 	struct stat *sb;
454 {
455 	register char *tp;
456 	char *cp, *xp, *index();
457 
458 	cp = index(buf, '\n');
459 	if (cp == 0 || cp - buf > in)
460 		return (0);
461 	for (tp = buf; tp != cp && isspace(*tp); tp++)
462 		if (!isascii(*tp))
463 			return (0);
464 	for (xp = tp; tp != cp && !isspace(*tp); tp++)
465 		if (!isascii(*tp))
466 			return (0);
467 	if (tp == xp)
468 		return (0);
469 	if (sb->st_mode & S_ISUID)
470 		printf("set-uid ");
471 	if (sb->st_mode & S_ISGID)
472 		printf("set-gid ");
473 	if (strncmp(xp, "/bin/sh", tp-xp) == 0)
474 		xp = "shell";
475 	else if (strncmp(xp, "/bin/csh", tp-xp) == 0)
476 		xp = "c-shell";
477 	else
478 		*tp = '\0';
479 	printf("executable %s script\n", xp);
480 	return (1);
481 }
482 
483 shell(bp, n, tab)
484 	char *bp;
485 	int n;
486 	char *tab[];
487 {
488 
489 	i = 0;
490 	do {
491 		if (buf[i] == '#' || buf[i] == ':')
492 			while (i < n && buf[i] != '\n')
493 				i++;
494 		if (++i >= n)
495 			break;
496 		if (lookup(tab) == 1)
497 			return (1);
498 	} while (i < n);
499 	return (0);
500 }
501