xref: /openbsd/usr.bin/file/file.c (revision d8e84ae2)
1 /* $OpenBSD: file.c,v 1.59 2017/04/18 14:16:48 nicm Exp $ */
2 
3 /*
4  * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/ioctl.h>
21 #include <sys/mman.h>
22 #include <sys/queue.h>
23 #include <sys/socket.h>
24 #include <sys/stat.h>
25 #include <sys/uio.h>
26 #include <sys/wait.h>
27 
28 #include <err.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <getopt.h>
32 #include <imsg.h>
33 #include <libgen.h>
34 #include <limits.h>
35 #include <pwd.h>
36 #include <stdlib.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <time.h>
40 #include <unistd.h>
41 
42 #include "file.h"
43 #include "magic.h"
44 #include "xmalloc.h"
45 
46 struct input_msg {
47 	int		idx;
48 
49 	struct stat	sb;
50 	int		error;
51 
52 	char		link_path[PATH_MAX];
53 	int		link_error;
54 	int		link_target;
55 };
56 
57 struct input_ack {
58 	int		idx;
59 };
60 
61 struct input_file {
62 	struct magic		*m;
63 	struct input_msg	*msg;
64 
65 	const char		*path;
66 	int			 fd;
67 
68 	void			*base;
69 	size_t			 size;
70 	int			 mapped;
71 	char			*result;
72 };
73 
74 extern char	*__progname;
75 
76 __dead void	 usage(void);
77 
78 static int	 prepare_message(struct input_msg *, int, const char *);
79 static void	 send_message(struct imsgbuf *, void *, size_t, int);
80 static int	 read_message(struct imsgbuf *, struct imsg *, pid_t);
81 
82 static void	 read_link(struct input_msg *, const char *);
83 
84 static __dead void child(int, pid_t, int, char **);
85 
86 static void	 test_file(struct input_file *, size_t);
87 
88 static int	 try_stat(struct input_file *);
89 static int	 try_empty(struct input_file *);
90 static int	 try_access(struct input_file *);
91 static int	 try_text(struct input_file *);
92 static int	 try_magic(struct input_file *);
93 static int	 try_unknown(struct input_file *);
94 
95 static int	 bflag;
96 static int	 cflag;
97 static int	 iflag;
98 static int	 Lflag;
99 static int	 sflag;
100 static int	 Wflag;
101 
102 static char	*magicpath;
103 static FILE	*magicfp;
104 
105 static struct option longopts[] = {
106 	{ "brief",       no_argument, NULL, 'b' },
107 	{ "dereference", no_argument, NULL, 'L' },
108 	{ "mime",        no_argument, NULL, 'i' },
109 	{ "mime-type",   no_argument, NULL, 'i' },
110 	{ NULL,          0,           NULL, 0   }
111 };
112 
113 __dead void
114 usage(void)
115 {
116 	fprintf(stderr, "usage: %s [-bchiLsW] file ...\n", __progname);
117 	exit(1);
118 }
119 
120 int
121 main(int argc, char **argv)
122 {
123 	int			 opt, pair[2], fd, idx;
124 	char			*home;
125 	struct passwd		*pw;
126 	struct imsgbuf		 ibuf;
127 	struct imsg		 imsg;
128 	struct input_msg	 msg;
129 	struct input_ack	*ack;
130 	pid_t			 pid, parent;
131 
132 	tzset();
133 
134 	for (;;) {
135 		opt = getopt_long(argc, argv, "bchiLsW", longopts, NULL);
136 		if (opt == -1)
137 			break;
138 		switch (opt) {
139 		case 'b':
140 			bflag = 1;
141 			break;
142 		case 'c':
143 			cflag = 1;
144 			break;
145 		case 'h':
146 			Lflag = 0;
147 			break;
148 		case 'i':
149 			iflag = 1;
150 			break;
151 		case 'L':
152 			Lflag = 1;
153 			break;
154 		case 's':
155 			sflag = 1;
156 			break;
157 		case 'W':
158 			Wflag = 1;
159 			break;
160 		default:
161 			usage();
162 		}
163 	}
164 	argc -= optind;
165 	argv += optind;
166 	if (cflag) {
167 		if (argc != 0)
168 			usage();
169 	} else if (argc == 0)
170 		usage();
171 
172 	magicfp = NULL;
173 	if (geteuid() != 0 && !issetugid()) {
174 		home = getenv("HOME");
175 		if (home == NULL || *home == '\0') {
176 			pw = getpwuid(getuid());
177 			if (pw != NULL)
178 				home = pw->pw_dir;
179 			else
180 				home = NULL;
181 		}
182 		if (home != NULL) {
183 			xasprintf(&magicpath, "%s/.magic", home);
184 			magicfp = fopen(magicpath, "r");
185 			if (magicfp == NULL)
186 				free(magicpath);
187 		}
188 	}
189 	if (magicfp == NULL) {
190 		magicpath = xstrdup("/etc/magic");
191 		magicfp = fopen(magicpath, "r");
192 	}
193 	if (magicfp == NULL)
194 		err(1, "%s", magicpath);
195 
196 	parent = getpid();
197 	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pair) != 0)
198 		err(1, "socketpair");
199 	switch (pid = fork()) {
200 	case -1:
201 		err(1, "fork");
202 	case 0:
203 		close(pair[0]);
204 		child(pair[1], parent, argc, argv);
205 	}
206 	close(pair[1]);
207 
208 	fclose(magicfp);
209 	magicfp = NULL;
210 
211 	if (cflag)
212 		goto wait_for_child;
213 
214 	imsg_init(&ibuf, pair[0]);
215 	for (idx = 0; idx < argc; idx++) {
216 		fd = prepare_message(&msg, idx, argv[idx]);
217 		send_message(&ibuf, &msg, sizeof msg, fd);
218 
219 		if (read_message(&ibuf, &imsg, pid) == 0)
220 			break;
221 		if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof *ack)
222 			errx(1, "message too small");
223 		ack = imsg.data;
224 		if (ack->idx != idx)
225 			errx(1, "index not expected");
226 		imsg_free(&imsg);
227 	}
228 
229 wait_for_child:
230 	close(pair[0]);
231 	while (wait(NULL) == -1 && errno != ECHILD) {
232 		if (errno != EINTR)
233 			err(1, "wait");
234 	}
235 	_exit(0); /* let the child flush */
236 }
237 
238 static int
239 prepare_message(struct input_msg *msg, int idx, const char *path)
240 {
241 	int	fd, mode, error;
242 
243 	memset(msg, 0, sizeof *msg);
244 	msg->idx = idx;
245 
246 	if (strcmp(path, "-") == 0) {
247 		if (fstat(STDIN_FILENO, &msg->sb) == -1) {
248 			msg->error = errno;
249 			return (-1);
250 		}
251 		return (STDIN_FILENO);
252 	}
253 
254 	if (Lflag)
255 		error = stat(path, &msg->sb);
256 	else
257 		error = lstat(path, &msg->sb);
258 	if (error == -1) {
259 		msg->error = errno;
260 		return (-1);
261 	}
262 
263 	/*
264 	 * pledge(2) doesn't let us pass directory file descriptors around -
265 	 * but in fact we don't need them, so just don't open directories or
266 	 * symlinks (which could be to directories).
267 	 */
268 	mode = msg->sb.st_mode;
269 	if (!S_ISDIR(mode) && !S_ISLNK(mode)) {
270 		fd = open(path, O_RDONLY|O_NONBLOCK);
271 		if (fd == -1 && (errno == ENFILE || errno == EMFILE))
272 			err(1, "open");
273 	} else
274 		fd = -1;
275 	if (S_ISLNK(mode))
276 		read_link(msg, path);
277 	return (fd);
278 
279 }
280 
281 static void
282 send_message(struct imsgbuf *ibuf, void *msg, size_t msglen, int fd)
283 {
284 	if (imsg_compose(ibuf, -1, -1, 0, fd, msg, msglen) != 1)
285 		err(1, "imsg_compose");
286 	if (imsg_flush(ibuf) != 0)
287 		err(1, "imsg_flush");
288 }
289 
290 static int
291 read_message(struct imsgbuf *ibuf, struct imsg *imsg, pid_t from)
292 {
293 	int	n;
294 
295 	while ((n = imsg_read(ibuf)) == -1 && errno == EAGAIN)
296 		/* nothing */ ;
297 	if (n == -1)
298 		err(1, "imsg_read");
299 	if (n == 0)
300 		return (0);
301 
302 	if ((n = imsg_get(ibuf, imsg)) == -1)
303 		err(1, "imsg_get");
304 	if (n == 0)
305 		return (0);
306 
307 	if ((pid_t)imsg->hdr.pid != from)
308 		errx(1, "PIDs don't match");
309 
310 	return (n);
311 
312 }
313 
314 static void
315 read_link(struct input_msg *msg, const char *path)
316 {
317 	struct stat	 sb;
318 	char		 lpath[PATH_MAX];
319 	char		*copy, *root;
320 	int		 used;
321 	ssize_t		 size;
322 
323 	size = readlink(path, lpath, sizeof lpath - 1);
324 	if (size == -1) {
325 		msg->link_error = errno;
326 		return;
327 	}
328 	lpath[size] = '\0';
329 
330 	if (*lpath == '/')
331 		strlcpy(msg->link_path, lpath, sizeof msg->link_path);
332 	else {
333 		copy = xstrdup(path);
334 
335 		root = dirname(copy);
336 		if (*root == '\0' || strcmp(root, ".") == 0 ||
337 		    strcmp (root, "/") == 0)
338 			strlcpy(msg->link_path, lpath, sizeof msg->link_path);
339 		else {
340 			used = snprintf(msg->link_path, sizeof msg->link_path,
341 			    "%s/%s", root, lpath);
342 			if (used < 0 || (size_t)used >= sizeof msg->link_path) {
343 				msg->link_error = ENAMETOOLONG;
344 				free(copy);
345 				return;
346 			}
347 		}
348 
349 		free(copy);
350 	}
351 
352 	if (!Lflag && stat(path, &sb) == -1)
353 		msg->link_target = errno;
354 }
355 
356 static __dead void
357 child(int fd, pid_t parent, int argc, char **argv)
358 {
359 	struct passwd		*pw;
360 	struct magic		*m;
361 	struct imsgbuf		 ibuf;
362 	struct imsg		 imsg;
363 	struct input_msg	*msg;
364 	struct input_ack	 ack;
365 	struct input_file	 inf;
366 	int			 i, idx;
367 	size_t			 len, width = 0;
368 
369 	if (pledge("stdio getpw recvfd id", NULL) == -1)
370 		err(1, "pledge");
371 
372 	if (geteuid() == 0) {
373 		pw = getpwnam(FILE_USER);
374 		if (pw == NULL)
375 			errx(1, "unknown user %s", FILE_USER);
376 		if (setgroups(1, &pw->pw_gid) != 0)
377 			err(1, "setgroups");
378 		if (setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) != 0)
379 			err(1, "setresgid");
380 		if (setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) != 0)
381 			err(1, "setresuid");
382 	}
383 
384 	if (pledge("stdio recvfd", NULL) == -1)
385 		err(1, "pledge");
386 
387 	m = magic_load(magicfp, magicpath, cflag || Wflag);
388 	if (cflag) {
389 		magic_dump(m);
390 		exit(0);
391 	}
392 
393 	for (i = 0; i < argc; i++) {
394 		len = strlen(argv[i]) + 1;
395 		if (len > width)
396 			width = len;
397 	}
398 
399 	imsg_init(&ibuf, fd);
400 	for (;;) {
401 		if (read_message(&ibuf, &imsg, parent) == 0)
402 			break;
403 		if (imsg.hdr.len != IMSG_HEADER_SIZE + sizeof *msg)
404 			errx(1, "message too small");
405 		msg = imsg.data;
406 
407 		idx = msg->idx;
408 		if (idx < 0 || idx >= argc)
409 			errx(1, "index out of range");
410 
411 		memset(&inf, 0, sizeof inf);
412 		inf.m = m;
413 		inf.msg = msg;
414 
415 		inf.path = argv[idx];
416 		inf.fd = imsg.fd;
417 
418 		test_file(&inf, width);
419 
420 		if (imsg.fd != -1)
421 			close(imsg.fd);
422 		imsg_free(&imsg);
423 
424 		ack.idx = idx;
425 		send_message(&ibuf, &ack, sizeof ack, -1);
426 	}
427 	exit(0);
428 }
429 
430 static void *
431 fill_buffer(int fd, size_t size, size_t *used)
432 {
433 	static void	*buffer;
434 	ssize_t		 got;
435 	size_t		 left;
436 	void		*next;
437 
438 	if (buffer == NULL)
439 		buffer = xmalloc(FILE_READ_SIZE);
440 
441 	next = buffer;
442 	left = size;
443 	while (left != 0) {
444 		got = read(fd, next, left);
445 		if (got == -1) {
446 			if (errno == EINTR)
447 				continue;
448 			return (NULL);
449 		}
450 		if (got == 0)
451 			break;
452 		next = (char *)next + got;
453 		left -= got;
454 	}
455 	*used = size - left;
456 	return (buffer);
457 }
458 
459 static int
460 load_file(struct input_file *inf)
461 {
462 	size_t	used;
463 
464 	if (inf->msg->sb.st_size == 0 && S_ISREG(inf->msg->sb.st_mode))
465 		return (0); /* empty file */
466 	if (inf->msg->sb.st_size == 0 || inf->msg->sb.st_size > FILE_READ_SIZE)
467 		inf->size = FILE_READ_SIZE;
468 	else
469 		inf->size = inf->msg->sb.st_size;
470 
471 	if (!S_ISREG(inf->msg->sb.st_mode))
472 		goto try_read;
473 
474 	inf->base = mmap(NULL, inf->size, PROT_READ, MAP_PRIVATE, inf->fd, 0);
475 	if (inf->base == MAP_FAILED)
476 		goto try_read;
477 	inf->mapped = 1;
478 	return (0);
479 
480 try_read:
481 	inf->base = fill_buffer(inf->fd, inf->size, &used);
482 	if (inf->base == NULL) {
483 		xasprintf(&inf->result, "cannot read '%s' (%s)", inf->path,
484 		    strerror(errno));
485 		return (1);
486 	}
487 	inf->size = used;
488 	return (0);
489 }
490 
491 static int
492 try_stat(struct input_file *inf)
493 {
494 	if (inf->msg->error != 0) {
495 		xasprintf(&inf->result, "cannot stat '%s' (%s)", inf->path,
496 		    strerror(inf->msg->error));
497 		return (1);
498 	}
499 	if (sflag || strcmp(inf->path, "-") == 0) {
500 		switch (inf->msg->sb.st_mode & S_IFMT) {
501 		case S_IFIFO:
502 			if (strcmp(inf->path, "-") != 0)
503 				break;
504 		case S_IFBLK:
505 		case S_IFCHR:
506 		case S_IFREG:
507 			return (0);
508 		}
509 	}
510 
511 	if (iflag && (inf->msg->sb.st_mode & S_IFMT) != S_IFREG) {
512 		xasprintf(&inf->result, "application/x-not-regular-file");
513 		return (1);
514 	}
515 
516 	switch (inf->msg->sb.st_mode & S_IFMT) {
517 	case S_IFDIR:
518 		xasprintf(&inf->result, "directory");
519 		return (1);
520 	case S_IFLNK:
521 		if (inf->msg->link_error != 0) {
522 			xasprintf(&inf->result, "unreadable symlink '%s' (%s)",
523 			    inf->path, strerror(inf->msg->link_error));
524 			return (1);
525 		}
526 		if (inf->msg->link_target == ELOOP)
527 			xasprintf(&inf->result, "symbolic link in a loop");
528 		else if (inf->msg->link_target != 0) {
529 			xasprintf(&inf->result, "broken symbolic link to '%s'",
530 			    inf->msg->link_path);
531 		} else {
532 			xasprintf(&inf->result, "symbolic link to '%s'",
533 			    inf->msg->link_path);
534 		}
535 		return (1);
536 	case S_IFSOCK:
537 		xasprintf(&inf->result, "socket");
538 		return (1);
539 	case S_IFBLK:
540 		xasprintf(&inf->result, "block special (%ld/%ld)",
541 		    (long)major(inf->msg->sb.st_rdev),
542 		    (long)minor(inf->msg->sb.st_rdev));
543 		return (1);
544 	case S_IFCHR:
545 		xasprintf(&inf->result, "character special (%ld/%ld)",
546 		    (long)major(inf->msg->sb.st_rdev),
547 		    (long)minor(inf->msg->sb.st_rdev));
548 		return (1);
549 	case S_IFIFO:
550 		xasprintf(&inf->result, "fifo (named pipe)");
551 		return (1);
552 	}
553 	return (0);
554 }
555 
556 static int
557 try_empty(struct input_file *inf)
558 {
559 	if (inf->size != 0)
560 		return (0);
561 
562 	if (iflag)
563 		xasprintf(&inf->result, "application/x-empty");
564 	else
565 		xasprintf(&inf->result, "empty");
566 	return (1);
567 }
568 
569 static int
570 try_access(struct input_file *inf)
571 {
572 	char tmp[256] = "";
573 
574 	if (inf->msg->sb.st_size == 0 && S_ISREG(inf->msg->sb.st_mode))
575 		return (0); /* empty file */
576 	if (inf->fd != -1)
577 		return (0);
578 
579 	if (inf->msg->sb.st_mode & (S_IWUSR|S_IWGRP|S_IWOTH))
580 		strlcat(tmp, "writable, ", sizeof tmp);
581 	if (inf->msg->sb.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))
582 		strlcat(tmp, "executable, ", sizeof tmp);
583 	if (S_ISREG(inf->msg->sb.st_mode))
584 		strlcat(tmp, "regular file, ", sizeof tmp);
585 	strlcat(tmp, "no read permission", sizeof tmp);
586 
587 	inf->result = xstrdup(tmp);
588 	return (1);
589 }
590 
591 static int
592 try_text(struct input_file *inf)
593 {
594 	const char	*type, *s;
595 	int		 flags;
596 
597 	flags = MAGIC_TEST_TEXT;
598 	if (iflag)
599 		flags |= MAGIC_TEST_MIME;
600 
601 	type = text_get_type(inf->base, inf->size);
602 	if (type == NULL)
603 		return (0);
604 
605 	s = magic_test(inf->m, inf->base, inf->size, flags);
606 	if (s != NULL) {
607 		inf->result = xstrdup(s);
608 		return (1);
609 	}
610 
611 	s = text_try_words(inf->base, inf->size, flags);
612 	if (s != NULL) {
613 		if (iflag)
614 			inf->result = xstrdup(s);
615 		else
616 			xasprintf(&inf->result, "%s %s text", type, s);
617 		return (1);
618 	}
619 
620 	if (iflag)
621 		inf->result = xstrdup("text/plain");
622 	else
623 		xasprintf(&inf->result, "%s text", type);
624 	return (1);
625 }
626 
627 static int
628 try_magic(struct input_file *inf)
629 {
630 	const char	*s;
631 	int		 flags;
632 
633 	flags = 0;
634 	if (iflag)
635 		flags |= MAGIC_TEST_MIME;
636 
637 	s = magic_test(inf->m, inf->base, inf->size, flags);
638 	if (s != NULL) {
639 		inf->result = xstrdup(s);
640 		return (1);
641 	}
642 	return (0);
643 }
644 
645 static int
646 try_unknown(struct input_file *inf)
647 {
648 	if (iflag)
649 		xasprintf(&inf->result, "application/x-not-regular-file");
650 	else
651 		xasprintf(&inf->result, "data");
652 	return (1);
653 }
654 
655 static void
656 test_file(struct input_file *inf, size_t width)
657 {
658 	char	*label;
659 	int	 stop;
660 
661 	stop = 0;
662 	if (!stop)
663 		stop = try_stat(inf);
664 	if (!stop)
665 		stop = try_access(inf);
666 	if (!stop)
667 		stop = load_file(inf);
668 	if (!stop)
669 		stop = try_empty(inf);
670 	if (!stop)
671 		stop = try_magic(inf);
672 	if (!stop)
673 		stop = try_text(inf);
674 	if (!stop)
675 		stop = try_unknown(inf);
676 
677 	if (bflag)
678 		printf("%s\n", inf->result);
679 	else {
680 		if (strcmp(inf->path, "-") == 0)
681 			xasprintf(&label, "/dev/stdin:");
682 		else
683 			xasprintf(&label, "%s:", inf->path);
684 		printf("%-*s %s\n", (int)width, label, inf->result);
685 		free(label);
686 	}
687 	free(inf->result);
688 
689 	if (inf->mapped && inf->base != NULL)
690 		munmap(inf->base, inf->size);
691 }
692