xref: /openbsd/usr.bin/less/filename.c (revision e5dd7070)
1 /*
2  * Copyright (C) 1984-2012  Mark Nudelman
3  * Modified for use with illumos by Garrett D'Amore.
4  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
5  *
6  * You may distribute under the terms of either the GNU General Public
7  * License or the Less License, as specified in the README file.
8  *
9  * For more information, see the README file.
10  */
11 
12 /*
13  * Routines to mess around with filenames (and files).
14  * Much of this is very OS dependent.
15  *
16  * Modified for illumos/POSIX -- it uses native glob(3C) rather than
17  * popen to a shell to perform the expansion.
18  */
19 
20 #include <sys/stat.h>
21 
22 #include <glob.h>
23 #include <stdarg.h>
24 
25 #include "less.h"
26 
27 extern int force_open;
28 extern int secure;
29 extern int use_lessopen;
30 extern int ctldisp;
31 extern int utf_mode;
32 extern IFILE curr_ifile;
33 extern IFILE old_ifile;
34 extern char openquote;
35 extern char closequote;
36 
37 /*
38  * Remove quotes around a filename.
39  */
40 char *
41 shell_unquote(char *str)
42 {
43 	char *name;
44 	char *p;
45 
46 	name = p = ecalloc(strlen(str)+1, sizeof (char));
47 	if (*str == openquote) {
48 		str++;
49 		while (*str != '\0') {
50 			if (*str == closequote) {
51 				if (str[1] != closequote)
52 					break;
53 				str++;
54 			}
55 			*p++ = *str++;
56 		}
57 	} else {
58 		char *esc = get_meta_escape();
59 		int esclen = strlen(esc);
60 		while (*str != '\0') {
61 			if (esclen > 0 && strncmp(str, esc, esclen) == 0)
62 				str += esclen;
63 			*p++ = *str++;
64 		}
65 	}
66 	*p = '\0';
67 	return (name);
68 }
69 
70 /*
71  * Get the shell's escape character.
72  */
73 char *
74 get_meta_escape(void)
75 {
76 	char *s;
77 
78 	s = lgetenv("LESSMETAESCAPE");
79 	if (s == NULL)
80 		s = "\\";
81 	return (s);
82 }
83 
84 /*
85  * Get the characters which the shell considers to be "metacharacters".
86  */
87 static char *
88 metachars(void)
89 {
90 	static char *mchars = NULL;
91 
92 	if (mchars == NULL) {
93 		mchars = lgetenv("LESSMETACHARS");
94 		if (mchars == NULL)
95 			mchars = DEF_METACHARS;
96 	}
97 	return (mchars);
98 }
99 
100 /*
101  * Is this a shell metacharacter?
102  */
103 static int
104 metachar(char c)
105 {
106 	return (strchr(metachars(), c) != NULL);
107 }
108 
109 /*
110  * Insert a backslash before each metacharacter in a string.
111  */
112 char *
113 shell_quote(const char *s)
114 {
115 	const char *p;
116 	char *r;
117 	char *newstr;
118 	int len;
119 	char *esc = get_meta_escape();
120 	int esclen = strlen(esc);
121 	int use_quotes = 0;
122 	int have_quotes = 0;
123 
124 	/*
125 	 * Determine how big a string we need to allocate.
126 	 */
127 	len = 1; /* Trailing null byte */
128 	for (p = s; *p != '\0'; p++) {
129 		len++;
130 		if (*p == openquote || *p == closequote)
131 			have_quotes = 1;
132 		if (metachar(*p)) {
133 			if (esclen == 0) {
134 				/*
135 				 * We've got a metachar, but this shell
136 				 * doesn't support escape chars.  Use quotes.
137 				 */
138 				use_quotes = 1;
139 			} else {
140 				/*
141 				 * Allow space for the escape char.
142 				 */
143 				len += esclen;
144 			}
145 		}
146 	}
147 	/*
148 	 * Allocate and construct the new string.
149 	 */
150 	if (use_quotes) {
151 		/* We can't quote a string that contains quotes. */
152 		if (have_quotes)
153 			return (NULL);
154 		newstr  = easprintf("%c%s%c", openquote, s, closequote);
155 	} else {
156 		newstr = r = ecalloc(len, sizeof (char));
157 		while (*s != '\0') {
158 			if (metachar(*s)) {
159 				/*
160 				 * Add the escape char.
161 				 */
162 				(void) strlcpy(r, esc, newstr + len - p);
163 				r += esclen;
164 			}
165 			*r++ = *s++;
166 		}
167 		*r = '\0';
168 	}
169 	return (newstr);
170 }
171 
172 /*
173  * Return a pathname that points to a specified file in a specified directory.
174  * Return NULL if the file does not exist in the directory.
175  */
176 static char *
177 dirfile(const char *dirname, const char *filename)
178 {
179 	char *pathname;
180 	char *qpathname;
181 	int f;
182 
183 	if (dirname == NULL || *dirname == '\0')
184 		return (NULL);
185 	/*
186 	 * Construct the full pathname.
187 	 */
188 	pathname = easprintf("%s/%s", dirname, filename);
189 	/*
190 	 * Make sure the file exists.
191 	 */
192 	qpathname = shell_unquote(pathname);
193 	f = open(qpathname, O_RDONLY);
194 	if (f == -1) {
195 		free(pathname);
196 		pathname = NULL;
197 	} else {
198 		(void) close(f);
199 	}
200 	free(qpathname);
201 	return (pathname);
202 }
203 
204 /*
205  * Return the full pathname of the given file in the "home directory".
206  */
207 char *
208 homefile(char *filename)
209 {
210 	return (dirfile(lgetenv("HOME"), filename));
211 }
212 
213 /*
214  * Expand a string, substituting any "%" with the current filename,
215  * and any "#" with the previous filename.
216  * But a string of N "%"s is just replaced with N-1 "%"s.
217  * Likewise for a string of N "#"s.
218  * {{ This is a lot of work just to support % and #. }}
219  */
220 char *
221 fexpand(char *s)
222 {
223 	char *fr, *to;
224 	int n;
225 	char *e;
226 	IFILE ifile;
227 
228 #define	fchar_ifile(c) \
229 	((c) == '%' ? curr_ifile : (c) == '#' ? old_ifile : NULL)
230 
231 	/*
232 	 * Make one pass to see how big a buffer we
233 	 * need to allocate for the expanded string.
234 	 */
235 	n = 0;
236 	for (fr = s; *fr != '\0'; fr++) {
237 		switch (*fr) {
238 		case '%':
239 		case '#':
240 			if (fr > s && fr[-1] == *fr) {
241 				/*
242 				 * Second (or later) char in a string
243 				 * of identical chars.  Treat as normal.
244 				 */
245 				n++;
246 			} else if (fr[1] != *fr) {
247 				/*
248 				 * Single char (not repeated).  Treat specially.
249 				 */
250 				ifile = fchar_ifile(*fr);
251 				if (ifile == NULL)
252 					n++;
253 				else
254 					n += strlen(get_filename(ifile));
255 			}
256 			/*
257 			 * Else it is the first char in a string of
258 			 * identical chars.  Just discard it.
259 			 */
260 			break;
261 		default:
262 			n++;
263 			break;
264 		}
265 	}
266 
267 	e = ecalloc(n+1, sizeof (char));
268 
269 	/*
270 	 * Now copy the string, expanding any "%" or "#".
271 	 */
272 	to = e;
273 	for (fr = s; *fr != '\0'; fr++) {
274 		switch (*fr) {
275 		case '%':
276 		case '#':
277 			if (fr > s && fr[-1] == *fr) {
278 				*to++ = *fr;
279 			} else if (fr[1] != *fr) {
280 				ifile = fchar_ifile(*fr);
281 				if (ifile == NULL) {
282 					*to++ = *fr;
283 				} else {
284 					(void) strlcpy(to, get_filename(ifile),
285 					    e + n + 1 - to);
286 					to += strlen(to);
287 				}
288 			}
289 			break;
290 		default:
291 			*to++ = *fr;
292 			break;
293 		}
294 	}
295 	*to = '\0';
296 	return (e);
297 }
298 
299 /*
300  * Return a blank-separated list of filenames which "complete"
301  * the given string.
302  */
303 char *
304 fcomplete(char *s)
305 {
306 	char *fpat;
307 	char *qs;
308 
309 	if (secure)
310 		return (NULL);
311 	/*
312 	 * Complete the filename "s" by globbing "s*".
313 	 */
314 	fpat =  easprintf("%s*", s);
315 
316 	qs = lglob(fpat);
317 	s = shell_unquote(qs);
318 	if (strcmp(s, fpat) == 0) {
319 		/*
320 		 * The filename didn't expand.
321 		 */
322 		free(qs);
323 		qs = NULL;
324 	}
325 	free(s);
326 	free(fpat);
327 	return (qs);
328 }
329 
330 /*
331  * Try to determine if a file is "binary".
332  * This is just a guess, and we need not try too hard to make it accurate.
333  */
334 int
335 bin_file(int f)
336 {
337 	char data[256];
338 	ssize_t i, n;
339 	wchar_t ch;
340 	int bin_count, len;
341 
342 	if (!seekable(f))
343 		return (0);
344 	if (lseek(f, (off_t)0, SEEK_SET) == (off_t)-1)
345 		return (0);
346 	n = read(f, data, sizeof (data));
347 	bin_count = 0;
348 	for (i = 0; i < n; i += len) {
349 		len = mbtowc(&ch, data + i, n - i);
350 		if (len <= 0) {
351 			bin_count++;
352 			len = 1;
353 		} else if (iswprint(ch) == 0 && iswspace(ch) == 0 &&
354 		    data[i] != '\b' &&
355 		    (ctldisp != OPT_ONPLUS || data[i] != ESC))
356 			bin_count++;
357 	}
358 	/*
359 	 * Call it a binary file if there are more than 5 binary characters
360 	 * in the first 256 bytes of the file.
361 	 */
362 	return (bin_count > 5);
363 }
364 
365 /*
366  * Read a string from a file.
367  * Return a pointer to the string in memory.
368  */
369 static char *
370 readfd(FILE *fd)
371 {
372 	int len;
373 	int ch;
374 	char *buf;
375 	char *p;
376 
377 	/*
378 	 * Make a guess about how many chars in the string
379 	 * and allocate a buffer to hold it.
380 	 */
381 	len = 100;
382 	buf = ecalloc(len, sizeof (char));
383 	for (p = buf; ; p++) {
384 		if ((ch = getc(fd)) == '\n' || ch == EOF)
385 			break;
386 		if (p >= buf + len-1) {
387 			/*
388 			 * The string is too big to fit in the buffer we have.
389 			 * Allocate a new buffer, twice as big.
390 			 */
391 			len *= 2;
392 			*p = '\0';
393 			p = ecalloc(len, sizeof (char));
394 			strlcpy(p, buf, len);
395 			free(buf);
396 			buf = p;
397 			p = buf + strlen(buf);
398 		}
399 		*p = (char)ch;
400 	}
401 	*p = '\0';
402 	return (buf);
403 }
404 
405 /*
406  * Execute a shell command.
407  * Return a pointer to a pipe connected to the shell command's standard output.
408  */
409 static FILE *
410 shellcmd(char *cmd)
411 {
412 	FILE *fd;
413 
414 	char *shell;
415 
416 	shell = lgetenv("SHELL");
417 	if (shell != NULL && *shell != '\0') {
418 		char *scmd;
419 		char *esccmd;
420 
421 		/*
422 		 * Read the output of <$SHELL -c cmd>.
423 		 * Escape any metacharacters in the command.
424 		 */
425 		esccmd = shell_quote(cmd);
426 		if (esccmd == NULL) {
427 			fd = popen(cmd, "r");
428 		} else {
429 			scmd = easprintf("%s -c %s", shell, esccmd);
430 			free(esccmd);
431 			fd = popen(scmd, "r");
432 			free(scmd);
433 		}
434 	} else {
435 		fd = popen(cmd, "r");
436 	}
437 	/*
438 	 * Redirection in `popen' might have messed with the
439 	 * standard devices.  Restore binary input mode.
440 	 */
441 	return (fd);
442 }
443 
444 /*
445  * Expand a filename, doing any system-specific metacharacter substitutions.
446  */
447 char *
448 lglob(char *filename)
449 {
450 	char *gfilename;
451 	char *ofilename;
452 	glob_t list;
453 	int i;
454 	int length;
455 	char *p;
456 	char *qfilename;
457 
458 	ofilename = fexpand(filename);
459 	if (secure)
460 		return (ofilename);
461 	filename = shell_unquote(ofilename);
462 
463 	/*
464 	 * The globbing function returns a list of names.
465 	 */
466 
467 #ifndef	GLOB_TILDE
468 #define	GLOB_TILDE	0
469 #endif
470 #ifndef	GLOB_LIMIT
471 #define	GLOB_LIMIT	0
472 #endif
473 	if (glob(filename, GLOB_TILDE | GLOB_LIMIT, NULL, &list) != 0) {
474 		free(filename);
475 		return (ofilename);
476 	}
477 	length = 1; /* Room for trailing null byte */
478 	for (i = 0; i < list.gl_pathc; i++) {
479 		p = list.gl_pathv[i];
480 		qfilename = shell_quote(p);
481 		if (qfilename != NULL) {
482 			length += strlen(qfilename) + 1;
483 			free(qfilename);
484 		}
485 	}
486 	gfilename = ecalloc(length, sizeof (char));
487 	for (i = 0; i < list.gl_pathc; i++) {
488 		p = list.gl_pathv[i];
489 		qfilename = shell_quote(p);
490 		if (qfilename != NULL) {
491 			if (i != 0) {
492 				(void) strlcat(gfilename, " ", length);
493 			}
494 			(void) strlcat(gfilename, qfilename, length);
495 			free(qfilename);
496 		}
497 	}
498 	globfree(&list);
499 	free(filename);
500 	free(ofilename);
501 	return (gfilename);
502 }
503 
504 /*
505  * Expand LESSOPEN or LESSCLOSE.  Returns a newly allocated string
506  * on success, NULL otherwise.
507  */
508 static char *
509 expand_pct_s(const char *fmt, ...)
510 {
511 	int		n;
512 	int		len;
513 	char		*r, *d;
514 	const char	*f[3];		/* max expansions + 1 for NULL */
515 	va_list		ap;
516 
517 	va_start(ap, fmt);
518 	for (n = 0; n < ((sizeof (f)/sizeof (f[0])) - 1); n++) {
519 		f[n] = (const char *)va_arg(ap, const char *);
520 		if (f[n] == NULL) {
521 			break;
522 		}
523 	}
524 	va_end(ap);
525 	f[n] = NULL;	/* terminate list */
526 
527 	len = strlen(fmt) + 1;
528 	for (n = 0; f[n] != NULL; n++) {
529 		len += strlen(f[n]);	/* technically could -2 for "%s" */
530 	}
531 	r = ecalloc(len, sizeof (char));
532 
533 	for (n = 0, d = r; *fmt != 0; ) {
534 		if (*fmt != '%') {
535 			*d++ = *fmt++;
536 			continue;
537 		}
538 		fmt++;
539 		/* Permit embedded "%%" */
540 		switch (*fmt) {
541 		case '%':
542 			*d++ = '%';
543 			fmt++;
544 			break;
545 		case 's':
546 			if (f[n] == NULL) {
547 				va_end(ap);
548 				free(r);
549 				return (NULL);
550 			}
551 			(void) strlcpy(d, f[n++], r + len - d);
552 			fmt++;
553 			d += strlen(d);
554 			break;
555 		default:
556 			va_end(ap);
557 			free(r);
558 			return (NULL);
559 		}
560 	}
561 	*d = '\0';
562 	return (r);
563 }
564 
565 /*
566  * See if we should open a "replacement file"
567  * instead of the file we're about to open.
568  */
569 char *
570 open_altfile(char *filename, int *pf, void **pfd)
571 {
572 	char *lessopen;
573 	char *cmd;
574 	FILE *fd;
575 	int returnfd = 0;
576 
577 	if (!use_lessopen || secure)
578 		return (NULL);
579 	ch_ungetchar(-1);
580 	if ((lessopen = lgetenv("LESSOPEN")) == NULL)
581 		return (NULL);
582 	while (*lessopen == '|') {
583 		/*
584 		 * If LESSOPEN starts with a |, it indicates
585 		 * a "pipe preprocessor".
586 		 */
587 		lessopen++;
588 		returnfd++;
589 	}
590 	if (*lessopen == '-') {
591 		/*
592 		 * Lessopen preprocessor will accept "-" as a filename.
593 		 */
594 		lessopen++;
595 	} else {
596 		if (strcmp(filename, "-") == 0)
597 			return (NULL);
598 	}
599 
600 	if ((cmd = expand_pct_s(lessopen, filename, NULL)) == NULL) {
601 		error("Invalid LESSOPEN variable", NULL);
602 		return (NULL);
603 	}
604 	fd = shellcmd(cmd);
605 	free(cmd);
606 	if (fd == NULL) {
607 		/*
608 		 * Cannot create the pipe.
609 		 */
610 		return (NULL);
611 	}
612 	if (returnfd) {
613 		int f;
614 		char c;
615 
616 		/*
617 		 * Read one char to see if the pipe will produce any data.
618 		 * If it does, push the char back on the pipe.
619 		 */
620 		f = fileno(fd);
621 		if (read(f, &c, 1) != 1) {
622 			/*
623 			 * Pipe is empty.
624 			 * If more than 1 pipe char was specified,
625 			 * the exit status tells whether the file itself
626 			 * is empty, or if there is no alt file.
627 			 * If only one pipe char, just assume no alt file.
628 			 */
629 			int status = pclose(fd);
630 			if (returnfd > 1 && status == 0) {
631 				*pfd = NULL;
632 				*pf = -1;
633 				return (estrdup(FAKE_EMPTYFILE));
634 			}
635 			return (NULL);
636 		}
637 		ch_ungetchar(c);
638 		*pfd = (void *) fd;
639 		*pf = f;
640 		return (estrdup("-"));
641 	}
642 	cmd = readfd(fd);
643 	pclose(fd);
644 	if (*cmd == '\0')
645 		/*
646 		 * Pipe is empty.  This means there is no alt file.
647 		 */
648 		return (NULL);
649 	return (cmd);
650 }
651 
652 /*
653  * Close a replacement file.
654  */
655 void
656 close_altfile(char *altfilename, char *filename, void *pipefd)
657 {
658 	char *lessclose;
659 	FILE *fd;
660 	char *cmd;
661 
662 	if (secure)
663 		return;
664 	if (pipefd != NULL) {
665 		pclose((FILE *)pipefd);
666 	}
667 	if ((lessclose = lgetenv("LESSCLOSE")) == NULL)
668 		return;
669 	cmd = expand_pct_s(lessclose, filename, altfilename, NULL);
670 	if (cmd == NULL) {
671 		error("Invalid LESSCLOSE variable", NULL);
672 		return;
673 	}
674 	fd = shellcmd(cmd);
675 	free(cmd);
676 	if (fd != NULL)
677 		(void) pclose(fd);
678 }
679 
680 /*
681  * Is the specified file a directory?
682  */
683 int
684 is_dir(char *filename)
685 {
686 	int isdir = 0;
687 	int r;
688 	struct stat statbuf;
689 
690 	filename = shell_unquote(filename);
691 
692 	r = stat(filename, &statbuf);
693 	isdir = (r >= 0 && S_ISDIR(statbuf.st_mode));
694 	free(filename);
695 	return (isdir);
696 }
697 
698 /*
699  * Returns NULL if the file can be opened and
700  * is an ordinary file, otherwise an error message
701  * (if it cannot be opened or is a directory, etc.)
702  */
703 char *
704 bad_file(char *filename)
705 {
706 	char *m = NULL;
707 
708 	filename = shell_unquote(filename);
709 	if (!force_open && is_dir(filename)) {
710 		m = easprintf("%s is a directory", filename);
711 	} else {
712 		int r;
713 		struct stat statbuf;
714 
715 		r = stat(filename, &statbuf);
716 		if (r == -1) {
717 			m = errno_message(filename);
718 		} else if (force_open) {
719 			m = NULL;
720 		} else if (!S_ISREG(statbuf.st_mode)) {
721 			m = easprintf("%s is not a regular file (use -f to "
722 			    "see it)", filename);
723 		}
724 	}
725 	free(filename);
726 	return (m);
727 }
728 
729 /*
730  * Return the size of a file, as cheaply as possible.
731  */
732 off_t
733 filesize(int f)
734 {
735 	struct stat statbuf;
736 
737 	if (fstat(f, &statbuf) >= 0)
738 		return (statbuf.st_size);
739 	return (-1);
740 }
741 
742 /*
743  * Return last component of a pathname.
744  */
745 char *
746 last_component(char *name)
747 {
748 	char *slash;
749 
750 	for (slash = name + strlen(name); slash > name; ) {
751 		--slash;
752 		if (*slash == '/')
753 			return (slash + 1);
754 	}
755 	return (name);
756 }
757