xref: /openbsd/usr.bin/file/file.c (revision 1da77805)
1 /* $OpenBSD: file.c,v 1.58 2016/05/01 20:34:26 nicm Exp $ */
2 
3 /*
4  * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/ioctl.h>
21 #include <sys/mman.h>
22 #include <sys/queue.h>
23 #include <sys/socket.h>
24 #include <sys/stat.h>
25 #include <sys/uio.h>
26 #include <sys/wait.h>
27 
28 #include <err.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <getopt.h>
32 #include <imsg.h>
33 #include <libgen.h>
34 #include <limits.h>
35 #include <pwd.h>
36 #include <stdlib.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <time.h>
40 #include <unistd.h>
41 
42 #include "file.h"
43 #include "magic.h"
44 #include "xmalloc.h"
45 
46 struct input_msg
47 {
48 	int		idx;
49 
50 	struct stat	sb;
51 	int		error;
52 
53 	char		link_path[PATH_MAX];
54 	int		link_error;
55 	int		link_target;
56 };
57 
58 struct input_ack
59 {
60 	int		idx;
61 };
62 
63 struct input_file
64 {
65 	struct magic		*m;
66 	struct input_msg	*msg;
67 
68 	const char		*path;
69 	int			 fd;
70 
71 	void			*base;
72 	size_t			 size;
73 	int			 mapped;
74 	char			*result;
75 };
76 
77 extern char	*__progname;
78 
79 __dead void	 usage(void);
80 
81 static int	 prepare_message(struct input_msg *, int, const char *);
82 static void	 send_message(struct imsgbuf *, void *, size_t, int);
83 static int	 read_message(struct imsgbuf *, struct imsg *, pid_t);
84 
85 static void	 read_link(struct input_msg *, const char *);
86 
87 static __dead void child(int, pid_t, int, char **);
88 
89 static void	 test_file(struct input_file *, size_t);
90 
91 static int	 try_stat(struct input_file *);
92 static int	 try_empty(struct input_file *);
93 static int	 try_access(struct input_file *);
94 static int	 try_text(struct input_file *);
95 static int	 try_magic(struct input_file *);
96 static int	 try_unknown(struct input_file *);
97 
98 static int	 bflag;
99 static int	 cflag;
100 static int	 iflag;
101 static int	 Lflag;
102 static int	 sflag;
103 static int	 Wflag;
104 
105 static char	*magicpath;
106 static FILE	*magicfp;
107 
108 static struct option longopts[] = {
109 	{ "brief",       no_argument, NULL, 'b' },
110 	{ "dereference", no_argument, NULL, 'L' },
111 	{ "mime",        no_argument, NULL, 'i' },
112 	{ "mime-type",   no_argument, NULL, 'i' },
113 	{ NULL,          0,           NULL, 0   }
114 };
115 
116 __dead void
117 usage(void)
118 {
119 	fprintf(stderr, "usage: %s [-bchiLsW] file ...\n", __progname);
120 	exit(1);
121 }
122 
123 int
124 main(int argc, char **argv)
125 {
126 	int			 opt, pair[2], fd, idx;
127 	char			*home;
128 	struct passwd		*pw;
129 	struct imsgbuf		 ibuf;
130 	struct imsg		 imsg;
131 	struct input_msg	 msg;
132 	struct input_ack	*ack;
133 	pid_t			 pid, parent;
134 
135 	tzset();
136 
137 	for (;;) {
138 		opt = getopt_long(argc, argv, "bchiLsW", longopts, NULL);
139 		if (opt == -1)
140 			break;
141 		switch (opt) {
142 		case 'b':
143 			bflag = 1;
144 			break;
145 		case 'c':
146 			cflag = 1;
147 			break;
148 		case 'h':
149 			Lflag = 0;
150 			break;
151 		case 'i':
152 			iflag = 1;
153 			break;
154 		case 'L':
155 			Lflag = 1;
156 			break;
157 		case 's':
158 			sflag = 1;
159 			break;
160 		case 'W':
161 			Wflag = 1;
162 			break;
163 		default:
164 			usage();
165 		}
166 	}
167 	argc -= optind;
168 	argv += optind;
169 	if (cflag) {
170 		if (argc != 0)
171 			usage();
172 	} else if (argc == 0)
173 		usage();
174 
175 	magicfp = NULL;
176 	if (geteuid() != 0 && !issetugid()) {
177 		home = getenv("HOME");
178 		if (home == NULL || *home == '\0') {
179 			pw = getpwuid(getuid());
180 			if (pw != NULL)
181 				home = pw->pw_dir;
182 			else
183 				home = NULL;
184 		}
185 		if (home != NULL) {
186 			xasprintf(&magicpath, "%s/.magic", home);
187 			magicfp = fopen(magicpath, "r");
188 			if (magicfp == NULL)
189 				free(magicpath);
190 		}
191 	}
192 	if (magicfp == NULL) {
193 		magicpath = xstrdup("/etc/magic");
194 		magicfp = fopen(magicpath, "r");
195 	}
196 	if (magicfp == NULL)
197 		err(1, "%s", magicpath);
198 
199 	parent = getpid();
200 	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pair) != 0)
201 		err(1, "socketpair");
202 	switch (pid = fork()) {
203 	case -1:
204 		err(1, "fork");
205 	case 0:
206 		close(pair[0]);
207 		child(pair[1], parent, argc, argv);
208 	}
209 	close(pair[1]);
210 
211 	fclose(magicfp);
212 	magicfp = NULL;
213 
214 	if (cflag)
215 		goto wait_for_child;
216 
217 	imsg_init(&ibuf, pair[0]);
218 	for (idx = 0; idx < argc; idx++) {
219 		fd = prepare_message(&msg, idx, argv[idx]);
220 		send_message(&ibuf, &msg, sizeof msg, fd);
221 
222 		if (read_message(&ibuf, &imsg, pid) == 0)
223 			break;
224 		if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof *ack)
225 			errx(1, "message too small");
226 		ack = imsg.data;
227 		if (ack->idx != idx)
228 			errx(1, "index not expected");
229 		imsg_free(&imsg);
230 	}
231 
232 wait_for_child:
233 	close(pair[0]);
234 	while (wait(NULL) == -1 && errno != ECHILD) {
235 		if (errno != EINTR)
236 			err(1, "wait");
237 	}
238 	_exit(0); /* let the child flush */
239 }
240 
241 static int
242 prepare_message(struct input_msg *msg, int idx, const char *path)
243 {
244 	int	fd, mode, error;
245 
246 	memset(msg, 0, sizeof *msg);
247 	msg->idx = idx;
248 
249 	if (strcmp(path, "-") == 0) {
250 		if (fstat(STDIN_FILENO, &msg->sb) == -1) {
251 			msg->error = errno;
252 			return (-1);
253 		}
254 		return (STDIN_FILENO);
255 	}
256 
257 	if (Lflag)
258 		error = stat(path, &msg->sb);
259 	else
260 		error = lstat(path, &msg->sb);
261 	if (error == -1) {
262 		msg->error = errno;
263 		return (-1);
264 	}
265 
266 	/*
267 	 * pledge(2) doesn't let us pass directory file descriptors around -
268 	 * but in fact we don't need them, so just don't open directories or
269 	 * symlinks (which could be to directories).
270 	 */
271 	mode = msg->sb.st_mode;
272 	if (!S_ISDIR(mode) && !S_ISLNK(mode)) {
273 		fd = open(path, O_RDONLY|O_NONBLOCK);
274 		if (fd == -1 && (errno == ENFILE || errno == EMFILE))
275 			err(1, "open");
276 	} else
277 		fd = -1;
278 	if (S_ISLNK(mode))
279 		read_link(msg, path);
280 	return (fd);
281 
282 }
283 
284 static void
285 send_message(struct imsgbuf *ibuf, void *msg, size_t msglen, int fd)
286 {
287 	if (imsg_compose(ibuf, -1, -1, 0, fd, msg, msglen) != 1)
288 		err(1, "imsg_compose");
289 	if (imsg_flush(ibuf) != 0)
290 		err(1, "imsg_flush");
291 }
292 
293 static int
294 read_message(struct imsgbuf *ibuf, struct imsg *imsg, pid_t from)
295 {
296 	int	n;
297 
298 	while ((n = imsg_read(ibuf)) == -1 && errno == EAGAIN)
299 		/* nothing */ ;
300 	if (n == -1)
301 		err(1, "imsg_read");
302 	if (n == 0)
303 		return (0);
304 
305 	if ((n = imsg_get(ibuf, imsg)) == -1)
306 		err(1, "imsg_get");
307 	if (n == 0)
308 		return (0);
309 
310 	if ((pid_t)imsg->hdr.pid != from)
311 		errx(1, "PIDs don't match");
312 
313 	return (n);
314 
315 }
316 
317 static void
318 read_link(struct input_msg *msg, const char *path)
319 {
320 	struct stat	 sb;
321 	char		 lpath[PATH_MAX];
322 	char		*copy, *root;
323 	int		 used;
324 	ssize_t		 size;
325 
326 	size = readlink(path, lpath, sizeof lpath - 1);
327 	if (size == -1) {
328 		msg->link_error = errno;
329 		return;
330 	}
331 	lpath[size] = '\0';
332 
333 	if (*lpath == '/')
334 		strlcpy(msg->link_path, lpath, sizeof msg->link_path);
335 	else {
336 		copy = xstrdup(path);
337 
338 		root = dirname(copy);
339 		if (*root == '\0' || strcmp(root, ".") == 0 ||
340 		    strcmp (root, "/") == 0)
341 			strlcpy(msg->link_path, lpath, sizeof msg->link_path);
342 		else {
343 			used = snprintf(msg->link_path, sizeof msg->link_path,
344 			    "%s/%s", root, lpath);
345 			if (used < 0 || (size_t)used >= sizeof msg->link_path) {
346 				msg->link_error = ENAMETOOLONG;
347 				free(copy);
348 				return;
349 			}
350 		}
351 
352 		free(copy);
353 	}
354 
355 	if (!Lflag && stat(path, &sb) == -1)
356 		msg->link_target = errno;
357 }
358 
359 static __dead void
360 child(int fd, pid_t parent, int argc, char **argv)
361 {
362 	struct passwd		*pw;
363 	struct magic		*m;
364 	struct imsgbuf		 ibuf;
365 	struct imsg		 imsg;
366 	struct input_msg	*msg;
367 	struct input_ack	 ack;
368 	struct input_file	 inf;
369 	int			 i, idx;
370 	size_t			 len, width = 0;
371 
372 	if (pledge("stdio getpw recvfd id", NULL) == -1)
373 		err(1, "pledge");
374 
375 	if (geteuid() == 0) {
376 		pw = getpwnam(FILE_USER);
377 		if (pw == NULL)
378 			errx(1, "unknown user %s", FILE_USER);
379 		if (setgroups(1, &pw->pw_gid) != 0)
380 			err(1, "setgroups");
381 		if (setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) != 0)
382 			err(1, "setresgid");
383 		if (setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) != 0)
384 			err(1, "setresuid");
385 	}
386 
387 	if (pledge("stdio recvfd", NULL) == -1)
388 		err(1, "pledge");
389 
390 	m = magic_load(magicfp, magicpath, cflag || Wflag);
391 	if (cflag) {
392 		magic_dump(m);
393 		exit(0);
394 	}
395 
396 	for (i = 0; i < argc; i++) {
397 		len = strlen(argv[i]) + 1;
398 		if (len > width)
399 			width = len;
400 	}
401 
402 	imsg_init(&ibuf, fd);
403 	for (;;) {
404 		if (read_message(&ibuf, &imsg, parent) == 0)
405 			break;
406 		if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof *msg)
407 			errx(1, "message too small");
408 		msg = imsg.data;
409 
410 		idx = msg->idx;
411 		if (idx < 0 || idx >= argc)
412 			errx(1, "index out of range");
413 
414 		memset(&inf, 0, sizeof inf);
415 		inf.m = m;
416 		inf.msg = msg;
417 
418 		inf.path = argv[idx];
419 		inf.fd = imsg.fd;
420 
421 		test_file(&inf, width);
422 
423 		if (imsg.fd != -1)
424 			close(imsg.fd);
425 		imsg_free(&imsg);
426 
427 		ack.idx = idx;
428 		send_message(&ibuf, &ack, sizeof ack, -1);
429 	}
430 	exit(0);
431 }
432 
433 static void *
434 fill_buffer(int fd, size_t size, size_t *used)
435 {
436 	static void	*buffer;
437 	ssize_t		 got;
438 	size_t		 left;
439 	void		*next;
440 
441 	if (buffer == NULL)
442 		buffer = xmalloc(FILE_READ_SIZE);
443 
444 	next = buffer;
445 	left = size;
446 	while (left != 0) {
447 		got = read(fd, next, left);
448 		if (got == -1) {
449 			if (errno == EINTR)
450 				continue;
451 			return NULL;
452 		}
453 		if (got == 0)
454 			break;
455 		next = (char *)next + got;
456 		left -= got;
457 	}
458 	*used = size - left;
459 	return buffer;
460 }
461 
462 static int
463 load_file(struct input_file *inf)
464 {
465 	size_t	used;
466 
467 	if (inf->msg->sb.st_size == 0 && S_ISREG(inf->msg->sb.st_mode))
468 		return (0); /* empty file */
469 	if (inf->msg->sb.st_size == 0 || inf->msg->sb.st_size > FILE_READ_SIZE)
470 		inf->size = FILE_READ_SIZE;
471 	else
472 		inf->size = inf->msg->sb.st_size;
473 
474 	if (!S_ISREG(inf->msg->sb.st_mode))
475 		goto try_read;
476 
477 	inf->base = mmap(NULL, inf->size, PROT_READ, MAP_PRIVATE, inf->fd, 0);
478 	if (inf->base == MAP_FAILED)
479 		goto try_read;
480 	inf->mapped = 1;
481 	return (0);
482 
483 try_read:
484 	inf->base = fill_buffer(inf->fd, inf->size, &used);
485 	if (inf->base == NULL) {
486 		xasprintf(&inf->result, "cannot read '%s' (%s)", inf->path,
487 		    strerror(errno));
488 		return (1);
489 	}
490 	inf->size = used;
491 	return (0);
492 }
493 
494 static int
495 try_stat(struct input_file *inf)
496 {
497 	if (inf->msg->error != 0) {
498 		xasprintf(&inf->result, "cannot stat '%s' (%s)", inf->path,
499 		    strerror(inf->msg->error));
500 		return (1);
501 	}
502 	if (sflag || strcmp(inf->path, "-") == 0) {
503 		switch (inf->msg->sb.st_mode & S_IFMT) {
504 		case S_IFIFO:
505 			if (strcmp(inf->path, "-") != 0)
506 				break;
507 		case S_IFBLK:
508 		case S_IFCHR:
509 		case S_IFREG:
510 			return (0);
511 		}
512 	}
513 
514 	if (iflag && (inf->msg->sb.st_mode & S_IFMT) != S_IFREG) {
515 		xasprintf(&inf->result, "application/x-not-regular-file");
516 		return (1);
517 	}
518 
519 	switch (inf->msg->sb.st_mode & S_IFMT) {
520 	case S_IFDIR:
521 		xasprintf(&inf->result, "directory");
522 		return (1);
523 	case S_IFLNK:
524 		if (inf->msg->link_error != 0) {
525 			xasprintf(&inf->result, "unreadable symlink '%s' (%s)",
526 			    inf->path, strerror(inf->msg->link_error));
527 			return (1);
528 		}
529 		if (inf->msg->link_target == ELOOP)
530 			xasprintf(&inf->result, "symbolic link in a loop");
531 		else if (inf->msg->link_target != 0) {
532 			xasprintf(&inf->result, "broken symbolic link to '%s'",
533 			    inf->msg->link_path);
534 		} else {
535 			xasprintf(&inf->result, "symbolic link to '%s'",
536 			    inf->msg->link_path);
537 		}
538 		return (1);
539 	case S_IFSOCK:
540 		xasprintf(&inf->result, "socket");
541 		return (1);
542 	case S_IFBLK:
543 		xasprintf(&inf->result, "block special (%ld/%ld)",
544 		    (long)major(inf->msg->sb.st_rdev),
545 		    (long)minor(inf->msg->sb.st_rdev));
546 		return (1);
547 	case S_IFCHR:
548 		xasprintf(&inf->result, "character special (%ld/%ld)",
549 		    (long)major(inf->msg->sb.st_rdev),
550 		    (long)minor(inf->msg->sb.st_rdev));
551 		return (1);
552 	case S_IFIFO:
553 		xasprintf(&inf->result, "fifo (named pipe)");
554 		return (1);
555 	}
556 	return (0);
557 }
558 
559 static int
560 try_empty(struct input_file *inf)
561 {
562 	if (inf->size != 0)
563 		return (0);
564 
565 	if (iflag)
566 		xasprintf(&inf->result, "application/x-empty");
567 	else
568 		xasprintf(&inf->result, "empty");
569 	return (1);
570 }
571 
572 static int
573 try_access(struct input_file *inf)
574 {
575 	char tmp[256] = "";
576 
577 	if (inf->msg->sb.st_size == 0 && S_ISREG(inf->msg->sb.st_mode))
578 		return (0); /* empty file */
579 	if (inf->fd != -1)
580 		return (0);
581 
582 	if (inf->msg->sb.st_mode & (S_IWUSR|S_IWGRP|S_IWOTH))
583 		strlcat(tmp, "writable, ", sizeof tmp);
584 	if (inf->msg->sb.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))
585 		strlcat(tmp, "executable, ", sizeof tmp);
586 	if (S_ISREG(inf->msg->sb.st_mode))
587 		strlcat(tmp, "regular file, ", sizeof tmp);
588 	strlcat(tmp, "no read permission", sizeof tmp);
589 
590 	inf->result = xstrdup(tmp);
591 	return (1);
592 }
593 
594 static int
595 try_text(struct input_file *inf)
596 {
597 	const char	*type, *s;
598 	int		 flags;
599 
600 	flags = MAGIC_TEST_TEXT;
601 	if (iflag)
602 		flags |= MAGIC_TEST_MIME;
603 
604 	type = text_get_type(inf->base, inf->size);
605 	if (type == NULL)
606 		return (0);
607 
608 	s = magic_test(inf->m, inf->base, inf->size, flags);
609 	if (s != NULL) {
610 		inf->result = xstrdup(s);
611 		return (1);
612 	}
613 
614 	s = text_try_words(inf->base, inf->size, flags);
615 	if (s != NULL) {
616 		if (iflag)
617 			inf->result = xstrdup(s);
618 		else
619 			xasprintf(&inf->result, "%s %s text", type, s);
620 		return (1);
621 	}
622 
623 	if (iflag)
624 		inf->result = xstrdup("text/plain");
625 	else
626 		xasprintf(&inf->result, "%s text", type);
627 	return (1);
628 }
629 
630 static int
631 try_magic(struct input_file *inf)
632 {
633 	const char	*s;
634 	int		 flags;
635 
636 	flags = 0;
637 	if (iflag)
638 		flags |= MAGIC_TEST_MIME;
639 
640 	s = magic_test(inf->m, inf->base, inf->size, flags);
641 	if (s != NULL) {
642 		inf->result = xstrdup(s);
643 		return (1);
644 	}
645 	return (0);
646 }
647 
648 static int
649 try_unknown(struct input_file *inf)
650 {
651 	if (iflag)
652 		xasprintf(&inf->result, "application/x-not-regular-file");
653 	else
654 		xasprintf(&inf->result, "data");
655 	return (1);
656 }
657 
658 static void
659 test_file(struct input_file *inf, size_t width)
660 {
661 	char	*label;
662 	int	 stop;
663 
664 	stop = 0;
665 	if (!stop)
666 		stop = try_stat(inf);
667 	if (!stop)
668 		stop = try_access(inf);
669 	if (!stop)
670 		stop = load_file(inf);
671 	if (!stop)
672 		stop = try_empty(inf);
673 	if (!stop)
674 		stop = try_magic(inf);
675 	if (!stop)
676 		stop = try_text(inf);
677 	if (!stop)
678 		stop = try_unknown(inf);
679 
680 	if (bflag)
681 		printf("%s\n", inf->result);
682 	else {
683 		if (strcmp(inf->path, "-") == 0)
684 			xasprintf(&label, "/dev/stdin:");
685 		else
686 			xasprintf(&label, "%s:", inf->path);
687 		printf("%-*s %s\n", (int)width, label, inf->result);
688 		free(label);
689 	}
690 	free(inf->result);
691 
692 	if (inf->mapped && inf->base != NULL)
693 		munmap(inf->base, inf->size);
694 }
695