xref: /openbsd/usr.bin/file/file.c (revision 0e59d0d1)
1 /* $OpenBSD: file.c,v 1.74 2024/11/21 13:35:20 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER
15  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
16  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 #include <sys/ioctl.h>
21 #include <sys/mman.h>
22 #include <sys/queue.h>
23 #include <sys/socket.h>
24 #include <sys/stat.h>
25 #include <sys/uio.h>
26 #include <sys/wait.h>
27 
28 #include <err.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <getopt.h>
32 #include <imsg.h>
33 #include <libgen.h>
34 #include <limits.h>
35 #include <pwd.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <time.h>
39 #include <unistd.h>
40 
41 #include "file.h"
42 #include "magic.h"
43 #include "xmalloc.h"
44 
45 struct input_msg {
46 	int		idx;
47 
48 	struct stat	sb;
49 	int		error;
50 
51 	char		link_path[PATH_MAX];
52 	int		link_error;
53 	int		link_target;
54 };
55 
56 struct input_ack {
57 	int		idx;
58 };
59 
60 struct input_file {
61 	struct magic		*m;
62 	struct input_msg	*msg;
63 
64 	const char		*path;
65 	int			 fd;
66 
67 	void			*base;
68 	size_t			 size;
69 	int			 mapped;
70 	char			*result;
71 };
72 
73 extern char	*__progname;
74 
75 __dead void	 usage(void);
76 
77 static int	 prepare_message(struct input_msg *, int, const char *);
78 static void	 send_message(struct imsgbuf *, void *, size_t, int);
79 static int	 read_message(struct imsgbuf *, struct imsg *, pid_t);
80 
81 static void	 read_link(struct input_msg *, const char *);
82 
83 static __dead void child(int, pid_t, int, char **);
84 
85 static void	 test_file(struct input_file *, size_t);
86 
87 static int	 try_stat(struct input_file *);
88 static int	 try_empty(struct input_file *);
89 static int	 try_access(struct input_file *);
90 static int	 try_text(struct input_file *);
91 static int	 try_magic(struct input_file *);
92 static int	 try_unknown(struct input_file *);
93 
94 static int	 bflag;
95 static int	 cflag;
96 static int	 iflag;
97 static int	 Lflag;
98 static int	 sflag;
99 static int	 Wflag;
100 
101 static char	*magicpath;
102 static FILE	*magicfp;
103 
104 static struct option longopts[] = {
105 	{ "brief",       no_argument, NULL, 'b' },
106 	{ "dereference", no_argument, NULL, 'L' },
107 	{ "mime",        no_argument, NULL, 'i' },
108 	{ "mime-type",   no_argument, NULL, 'i' },
109 	{ NULL,          0,           NULL, 0   }
110 };
111 
112 __dead void
usage(void)113 usage(void)
114 {
115 	fprintf(stderr, "usage: %s [-bchiLsW] file ...\n", __progname);
116 	exit(1);
117 }
118 
119 int
main(int argc,char ** argv)120 main(int argc, char **argv)
121 {
122 	int			 opt, pair[2], fd, idx;
123 	char			*home;
124 	struct passwd		*pw;
125 	struct imsgbuf		 ibuf;
126 	struct imsg		 imsg;
127 	struct input_msg	 msg;
128 	struct input_ack	 ack;
129 	pid_t			 pid, parent;
130 
131 	tzset();
132 
133 	for (;;) {
134 		opt = getopt_long(argc, argv, "bchiLsW", longopts, NULL);
135 		if (opt == -1)
136 			break;
137 		switch (opt) {
138 		case 'b':
139 			bflag = 1;
140 			break;
141 		case 'c':
142 			cflag = 1;
143 			break;
144 		case 'h':
145 			Lflag = 0;
146 			break;
147 		case 'i':
148 			iflag = 1;
149 			break;
150 		case 'L':
151 			Lflag = 1;
152 			break;
153 		case 's':
154 			sflag = 1;
155 			break;
156 		case 'W':
157 			Wflag = 1;
158 			break;
159 		default:
160 			usage();
161 		}
162 	}
163 	argc -= optind;
164 	argv += optind;
165 	if (cflag) {
166 		if (argc != 0)
167 			usage();
168 	} else if (argc == 0)
169 		usage();
170 
171 	if (pledge("stdio rpath getpw recvfd sendfd id proc", NULL) == -1)
172 		err(1, "pledge");
173 
174 	magicfp = NULL;
175 	if (geteuid() != 0 && !issetugid()) {
176 		home = getenv("HOME");
177 		if (home == NULL || *home == '\0') {
178 			pw = getpwuid(getuid());
179 			if (pw != NULL)
180 				home = pw->pw_dir;
181 			else
182 				home = NULL;
183 		}
184 		if (home != NULL) {
185 			xasprintf(&magicpath, "%s/.magic", home);
186 			magicfp = fopen(magicpath, "r");
187 			if (magicfp == NULL)
188 				free(magicpath);
189 		}
190 	}
191 	if (magicfp == NULL) {
192 		magicpath = xstrdup("/etc/magic");
193 		magicfp = fopen(magicpath, "r");
194 	}
195 	if (magicfp == NULL)
196 		err(1, "%s", magicpath);
197 
198 	parent = getpid();
199 	if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, pair) != 0)
200 		err(1, "socketpair");
201 	switch (pid = fork()) {
202 	case -1:
203 		err(1, "fork");
204 	case 0:
205 		close(pair[0]);
206 		child(pair[1], parent, argc, argv);
207 	}
208 	close(pair[1]);
209 
210 	if (pledge("stdio rpath sendfd", NULL) == -1)
211 		err(1, "pledge");
212 
213 	fclose(magicfp);
214 	magicfp = NULL;
215 
216 	if (cflag)
217 		goto wait_for_child;
218 
219 	if (imsgbuf_init(&ibuf, pair[0]) == -1)
220 		err(1, "imsgbuf_init");
221 	imsgbuf_allow_fdpass(&ibuf);
222 	for (idx = 0; idx < argc; idx++) {
223 		fd = prepare_message(&msg, idx, argv[idx]);
224 		send_message(&ibuf, &msg, sizeof msg, fd);
225 
226 		if (read_message(&ibuf, &imsg, pid) == 0)
227 			break;
228 		if (imsg_get_data(&imsg, &ack, sizeof ack) == -1)
229 			err(1, "bad message");
230 		if (ack.idx != idx)
231 			errx(1, "index not expected");
232 		imsg_free(&imsg);
233 	}
234 
235 wait_for_child:
236 	close(pair[0]);
237 	while (wait(NULL) == -1 && errno != ECHILD) {
238 		if (errno != EINTR)
239 			err(1, "wait");
240 	}
241 	_exit(0); /* let the child flush */
242 }
243 
244 static int
prepare_message(struct input_msg * msg,int idx,const char * path)245 prepare_message(struct input_msg *msg, int idx, const char *path)
246 {
247 	int	fd, mode, error;
248 
249 	memset(msg, 0, sizeof *msg);
250 	msg->idx = idx;
251 
252 	if (strcmp(path, "-") == 0) {
253 		if (fstat(STDIN_FILENO, &msg->sb) == -1) {
254 			msg->error = errno;
255 			return (-1);
256 		}
257 		return (STDIN_FILENO);
258 	}
259 
260 	if (Lflag)
261 		error = stat(path, &msg->sb);
262 	else
263 		error = lstat(path, &msg->sb);
264 	if (error == -1) {
265 		msg->error = errno;
266 		return (-1);
267 	}
268 
269 	/*
270 	 * pledge(2) doesn't let us pass directory file descriptors around -
271 	 * but in fact we don't need them, so just don't open directories or
272 	 * symlinks (which could be to directories).
273 	 */
274 	mode = msg->sb.st_mode;
275 	if (!S_ISDIR(mode) && !S_ISLNK(mode)) {
276 		fd = open(path, O_RDONLY|O_NONBLOCK);
277 		if (fd == -1 && (errno == ENFILE || errno == EMFILE))
278 			err(1, "open");
279 	} else
280 		fd = -1;
281 	if (S_ISLNK(mode))
282 		read_link(msg, path);
283 	return (fd);
284 
285 }
286 
287 static void
send_message(struct imsgbuf * ibuf,void * msg,size_t msglen,int fd)288 send_message(struct imsgbuf *ibuf, void *msg, size_t msglen, int fd)
289 {
290 	if (imsg_compose(ibuf, -1, -1, 0, fd, msg, msglen) != 1)
291 		err(1, "imsg_compose");
292 	if (imsgbuf_flush(ibuf) != 0)
293 		err(1, "imsgbuf_flush");
294 }
295 
296 static int
read_message(struct imsgbuf * ibuf,struct imsg * imsg,pid_t from)297 read_message(struct imsgbuf *ibuf, struct imsg *imsg, pid_t from)
298 {
299 	while (1) {
300 		switch (imsg_get(ibuf, imsg)) {
301 		case -1:
302 			err(1, "imsg_get");
303 		case 0:
304 			break;
305 		default:
306 			if ((pid_t)imsg->hdr.pid != from)
307 				errx(1, "PIDs don't match");
308 			return (1);
309 		}
310 
311 		switch (imsgbuf_read(ibuf)) {
312 		case -1:
313 			err(1, "imsgbuf_read");
314 		case 0:
315 			return (0);
316 		}
317 	}
318 }
319 
320 static void
read_link(struct input_msg * msg,const char * path)321 read_link(struct input_msg *msg, const char *path)
322 {
323 	struct stat	 sb;
324 	char		 lpath[PATH_MAX];
325 	char		*copy, *root;
326 	int		 used;
327 	ssize_t		 size;
328 
329 	size = readlink(path, lpath, sizeof lpath - 1);
330 	if (size == -1) {
331 		msg->link_error = errno;
332 		return;
333 	}
334 	lpath[size] = '\0';
335 
336 	if (*lpath == '/')
337 		strlcpy(msg->link_path, lpath, sizeof msg->link_path);
338 	else {
339 		copy = xstrdup(path);
340 
341 		root = dirname(copy);
342 		if (*root == '\0' || strcmp(root, ".") == 0 ||
343 		    strcmp (root, "/") == 0)
344 			strlcpy(msg->link_path, lpath, sizeof msg->link_path);
345 		else {
346 			used = snprintf(msg->link_path, sizeof msg->link_path,
347 			    "%s/%s", root, lpath);
348 			if (used < 0 || (size_t)used >= sizeof msg->link_path) {
349 				msg->link_error = ENAMETOOLONG;
350 				free(copy);
351 				return;
352 			}
353 		}
354 
355 		free(copy);
356 	}
357 
358 	if (!Lflag && stat(path, &sb) == -1)
359 		msg->link_target = errno;
360 }
361 
362 static __dead void
child(int fd,pid_t parent,int argc,char ** argv)363 child(int fd, pid_t parent, int argc, char **argv)
364 {
365 	struct passwd		*pw;
366 	struct magic		*m;
367 	struct imsgbuf		 ibuf;
368 	struct imsg		 imsg;
369 	struct input_msg	 msg;
370 	struct input_ack	 ack;
371 	struct input_file	 inf;
372 	int			 i, idx;
373 	size_t			 len, width = 0;
374 
375 	if (pledge("stdio getpw recvfd id", NULL) == -1)
376 		err(1, "pledge");
377 
378 	if (geteuid() == 0) {
379 		pw = getpwnam(FILE_USER);
380 		if (pw == NULL)
381 			errx(1, "unknown user %s", FILE_USER);
382 		if (setgroups(1, &pw->pw_gid) != 0)
383 			err(1, "setgroups");
384 		if (setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) != 0)
385 			err(1, "setresgid");
386 		if (setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid) != 0)
387 			err(1, "setresuid");
388 	}
389 
390 	if (pledge("stdio recvfd", NULL) == -1)
391 		err(1, "pledge");
392 
393 	m = magic_load(magicfp, magicpath, cflag || Wflag);
394 	if (cflag) {
395 		magic_dump(m);
396 		exit(0);
397 	}
398 
399 	for (i = 0; i < argc; i++) {
400 		len = strlen(argv[i]) + 1;
401 		if (len > width)
402 			width = len;
403 	}
404 
405 	if (imsgbuf_init(&ibuf, fd) == -1)
406 		err(1, "imsgbuf_init");
407 	imsgbuf_allow_fdpass(&ibuf);
408 	for (;;) {
409 		if (read_message(&ibuf, &imsg, parent) == 0)
410 			break;
411 		if (imsg_get_data(&imsg, &msg, sizeof msg) == -1)
412 			err(1, "bad message");
413 
414 		idx = msg.idx;
415 		if (idx < 0 || idx >= argc)
416 			errx(1, "index out of range");
417 
418 		memset(&inf, 0, sizeof inf);
419 		inf.m = m;
420 		inf.msg = &msg;
421 
422 		inf.path = argv[idx];
423 		inf.fd = imsg_get_fd(&imsg);
424 
425 		test_file(&inf, width);
426 
427 		if (inf.fd != -1)
428 			close(inf.fd);
429 		imsg_free(&imsg);
430 
431 		ack.idx = idx;
432 		send_message(&ibuf, &ack, sizeof ack, -1);
433 	}
434 	exit(0);
435 }
436 
437 static void *
fill_buffer(int fd,size_t size,size_t * used)438 fill_buffer(int fd, size_t size, size_t *used)
439 {
440 	static void	*buffer;
441 	ssize_t		 got;
442 	size_t		 left;
443 	void		*next;
444 
445 	if (buffer == NULL)
446 		buffer = xmalloc(FILE_READ_SIZE);
447 
448 	next = buffer;
449 	left = size;
450 	while (left != 0) {
451 		got = read(fd, next, left);
452 		if (got == -1) {
453 			if (errno == EINTR)
454 				continue;
455 			return (NULL);
456 		}
457 		if (got == 0)
458 			break;
459 		next = (char *)next + got;
460 		left -= got;
461 	}
462 	*used = size - left;
463 	return (buffer);
464 }
465 
466 static int
load_file(struct input_file * inf)467 load_file(struct input_file *inf)
468 {
469 	size_t	used;
470 
471 	if (inf->msg->sb.st_size == 0 && S_ISREG(inf->msg->sb.st_mode))
472 		return (0); /* empty file */
473 	if (inf->msg->sb.st_size == 0 || inf->msg->sb.st_size > FILE_READ_SIZE)
474 		inf->size = FILE_READ_SIZE;
475 	else
476 		inf->size = inf->msg->sb.st_size;
477 
478 	if (!S_ISREG(inf->msg->sb.st_mode))
479 		goto try_read;
480 
481 	inf->base = mmap(NULL, inf->size, PROT_READ, MAP_PRIVATE, inf->fd, 0);
482 	if (inf->base == MAP_FAILED)
483 		goto try_read;
484 	inf->mapped = 1;
485 	return (0);
486 
487 try_read:
488 	inf->base = fill_buffer(inf->fd, inf->size, &used);
489 	if (inf->base == NULL) {
490 		xasprintf(&inf->result, "cannot read '%s' (%s)", inf->path,
491 		    strerror(errno));
492 		return (1);
493 	}
494 	inf->size = used;
495 	return (0);
496 }
497 
498 static int
try_stat(struct input_file * inf)499 try_stat(struct input_file *inf)
500 {
501 	if (inf->msg->error != 0) {
502 		xasprintf(&inf->result, "cannot stat '%s' (%s)", inf->path,
503 		    strerror(inf->msg->error));
504 		return (1);
505 	}
506 	if (sflag || strcmp(inf->path, "-") == 0) {
507 		switch (inf->msg->sb.st_mode & S_IFMT) {
508 		case S_IFIFO:
509 			if (strcmp(inf->path, "-") != 0)
510 				break;
511 		case S_IFBLK:
512 		case S_IFCHR:
513 		case S_IFREG:
514 			return (0);
515 		}
516 	}
517 
518 	if (iflag && (inf->msg->sb.st_mode & S_IFMT) != S_IFREG) {
519 		xasprintf(&inf->result, "application/x-not-regular-file");
520 		return (1);
521 	}
522 
523 	switch (inf->msg->sb.st_mode & S_IFMT) {
524 	case S_IFDIR:
525 		xasprintf(&inf->result, "directory");
526 		return (1);
527 	case S_IFLNK:
528 		if (inf->msg->link_error != 0) {
529 			xasprintf(&inf->result, "unreadable symlink '%s' (%s)",
530 			    inf->path, strerror(inf->msg->link_error));
531 			return (1);
532 		}
533 		if (inf->msg->link_target == ELOOP)
534 			xasprintf(&inf->result, "symbolic link in a loop");
535 		else if (inf->msg->link_target != 0) {
536 			xasprintf(&inf->result, "broken symbolic link to '%s'",
537 			    inf->msg->link_path);
538 		} else {
539 			xasprintf(&inf->result, "symbolic link to '%s'",
540 			    inf->msg->link_path);
541 		}
542 		return (1);
543 	case S_IFSOCK:
544 		xasprintf(&inf->result, "socket");
545 		return (1);
546 	case S_IFBLK:
547 		xasprintf(&inf->result, "block special (%lu/%lu)",
548 		    (long)major(inf->msg->sb.st_rdev),
549 		    (long)minor(inf->msg->sb.st_rdev));
550 		return (1);
551 	case S_IFCHR:
552 		xasprintf(&inf->result, "character special (%lu/%lu)",
553 		    (long)major(inf->msg->sb.st_rdev),
554 		    (long)minor(inf->msg->sb.st_rdev));
555 		return (1);
556 	case S_IFIFO:
557 		xasprintf(&inf->result, "fifo (named pipe)");
558 		return (1);
559 	}
560 	return (0);
561 }
562 
563 static int
try_empty(struct input_file * inf)564 try_empty(struct input_file *inf)
565 {
566 	if (inf->size != 0)
567 		return (0);
568 
569 	if (iflag)
570 		xasprintf(&inf->result, "application/x-empty");
571 	else
572 		xasprintf(&inf->result, "empty");
573 	return (1);
574 }
575 
576 static int
try_access(struct input_file * inf)577 try_access(struct input_file *inf)
578 {
579 	char tmp[256] = "";
580 
581 	if (inf->msg->sb.st_size == 0 && S_ISREG(inf->msg->sb.st_mode))
582 		return (0); /* empty file */
583 	if (inf->fd != -1)
584 		return (0);
585 
586 	if (inf->msg->sb.st_mode & (S_IWUSR|S_IWGRP|S_IWOTH))
587 		strlcat(tmp, "writable, ", sizeof tmp);
588 	if (inf->msg->sb.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))
589 		strlcat(tmp, "executable, ", sizeof tmp);
590 	if (S_ISREG(inf->msg->sb.st_mode))
591 		strlcat(tmp, "regular file, ", sizeof tmp);
592 	strlcat(tmp, "no read permission", sizeof tmp);
593 
594 	inf->result = xstrdup(tmp);
595 	return (1);
596 }
597 
598 static int
try_text(struct input_file * inf)599 try_text(struct input_file *inf)
600 {
601 	const char	*type, *s;
602 	int		 flags;
603 
604 	flags = MAGIC_TEST_TEXT;
605 	if (iflag)
606 		flags |= MAGIC_TEST_MIME;
607 
608 	type = text_get_type(inf->base, inf->size);
609 	if (type == NULL)
610 		return (0);
611 
612 	s = magic_test(inf->m, inf->base, inf->size, flags);
613 	if (s != NULL) {
614 		inf->result = xstrdup(s);
615 		return (1);
616 	}
617 
618 	s = text_try_words(inf->base, inf->size, flags);
619 	if (s != NULL) {
620 		if (iflag)
621 			inf->result = xstrdup(s);
622 		else
623 			xasprintf(&inf->result, "%s %s text", type, s);
624 		return (1);
625 	}
626 
627 	if (iflag)
628 		inf->result = xstrdup("text/plain");
629 	else
630 		xasprintf(&inf->result, "%s text", type);
631 	return (1);
632 }
633 
634 static int
try_magic(struct input_file * inf)635 try_magic(struct input_file *inf)
636 {
637 	const char	*s;
638 	int		 flags;
639 
640 	flags = 0;
641 	if (iflag)
642 		flags |= MAGIC_TEST_MIME;
643 
644 	s = magic_test(inf->m, inf->base, inf->size, flags);
645 	if (s != NULL) {
646 		inf->result = xstrdup(s);
647 		return (1);
648 	}
649 	return (0);
650 }
651 
652 static int
try_unknown(struct input_file * inf)653 try_unknown(struct input_file *inf)
654 {
655 	if (iflag)
656 		xasprintf(&inf->result, "application/octet-stream");
657 	else
658 		xasprintf(&inf->result, "data");
659 	return (1);
660 }
661 
662 static void
test_file(struct input_file * inf,size_t width)663 test_file(struct input_file *inf, size_t width)
664 {
665 	char	*label;
666 	int	 stop;
667 
668 	stop = 0;
669 	if (!stop)
670 		stop = try_stat(inf);
671 	if (!stop)
672 		stop = try_access(inf);
673 	if (!stop)
674 		stop = load_file(inf);
675 	if (!stop)
676 		stop = try_empty(inf);
677 	if (!stop)
678 		stop = try_magic(inf);
679 	if (!stop)
680 		stop = try_text(inf);
681 	if (!stop)
682 		stop = try_unknown(inf);
683 
684 	if (bflag)
685 		printf("%s\n", inf->result);
686 	else {
687 		if (strcmp(inf->path, "-") == 0)
688 			xasprintf(&label, "/dev/stdin:");
689 		else
690 			xasprintf(&label, "%s:", inf->path);
691 		printf("%-*s %s\n", (int)width, label, inf->result);
692 		free(label);
693 	}
694 	free(inf->result);
695 
696 	if (inf->mapped && inf->base != NULL)
697 		munmap(inf->base, inf->size);
698 }
699