1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file file_io.c
4 /// \brief File opening, unlinking, and closing
5 //
6 // Author: Lasse Collin
7 //
8 // This file has been put into the public domain.
9 // You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12
13 #include "private.h"
14
15 #include <fcntl.h>
16
17 #ifdef TUKLIB_DOSLIKE
18 # include <io.h>
19 #else
20 # include <poll.h>
21 static bool warn_fchown;
22 #endif
23
24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
25 # include <sys/time.h>
26 #elif defined(HAVE__FUTIME)
27 # include <sys/utime.h>
28 #elif defined(HAVE_UTIME)
29 # include <utime.h>
30 #endif
31
32 #ifdef HAVE_CAPSICUM
33 # ifdef HAVE_SYS_CAPSICUM_H
34 # include <sys/capsicum.h>
35 # else
36 # include <sys/capability.h>
37 # endif
38 #endif
39
40 #include "tuklib_open_stdxxx.h"
41
42 #ifndef O_BINARY
43 # define O_BINARY 0
44 #endif
45
46 #ifndef O_NOCTTY
47 # define O_NOCTTY 0
48 #endif
49
50 // Using this macro to silence a warning from gcc -Wlogical-op.
51 #if EAGAIN == EWOULDBLOCK
52 # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN)
53 #else
54 # define IS_EAGAIN_OR_EWOULDBLOCK(e) \
55 ((e) == EAGAIN || (e) == EWOULDBLOCK)
56 #endif
57
58
59 typedef enum {
60 IO_WAIT_MORE, // Reading or writing is possible.
61 IO_WAIT_ERROR, // Error or user_abort
62 IO_WAIT_TIMEOUT, // poll() timed out
63 } io_wait_ret;
64
65
66 /// If true, try to create sparse files when decompressing.
67 static bool try_sparse = true;
68
69 #ifdef ENABLE_SANDBOX
70 /// True if the conditions for sandboxing (described in main()) have been met.
71 static bool sandbox_allowed = false;
72 #endif
73
74 #ifndef TUKLIB_DOSLIKE
75 /// File status flags of standard input. This is used by io_open_src()
76 /// and io_close_src().
77 static int stdin_flags;
78 static bool restore_stdin_flags = false;
79
80 /// Original file status flags of standard output. This is used by
81 /// io_open_dest() and io_close_dest() to save and restore the flags.
82 static int stdout_flags;
83 static bool restore_stdout_flags = false;
84
85 /// Self-pipe used together with the user_abort variable to avoid
86 /// race conditions with signal handling.
87 static int user_abort_pipe[2];
88 #endif
89
90
91 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size);
92
93
94 extern void
io_init(void)95 io_init(void)
96 {
97 // Make sure that stdin, stdout, and stderr are connected to
98 // a valid file descriptor. Exit immediately with exit code ERROR
99 // if we cannot make the file descriptors valid. Maybe we should
100 // print an error message, but our stderr could be screwed anyway.
101 tuklib_open_stdxxx(E_ERROR);
102
103 #ifndef TUKLIB_DOSLIKE
104 // If fchown() fails setting the owner, we warn about it only if
105 // we are root.
106 warn_fchown = geteuid() == 0;
107
108 // Create a pipe for the self-pipe trick.
109 if (pipe(user_abort_pipe))
110 message_fatal(_("Error creating a pipe: %s"),
111 strerror(errno));
112
113 // Make both ends of the pipe non-blocking.
114 for (unsigned i = 0; i < 2; ++i) {
115 int flags = fcntl(user_abort_pipe[i], F_GETFL);
116 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL,
117 flags | O_NONBLOCK) == -1)
118 message_fatal(_("Error creating a pipe: %s"),
119 strerror(errno));
120 }
121 #endif
122
123 #ifdef __DJGPP__
124 // Avoid doing useless things when statting files.
125 // This isn't important but doesn't hurt.
126 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
127 #endif
128
129 return;
130 }
131
132
133 #ifndef TUKLIB_DOSLIKE
134 extern void
io_write_to_user_abort_pipe(void)135 io_write_to_user_abort_pipe(void)
136 {
137 // If the write() fails, it's probably due to the pipe being full.
138 // Failing in that case is fine. If the reason is something else,
139 // there's not much we can do since this is called in a signal
140 // handler. So ignore the errors and try to avoid warnings with
141 // GCC and glibc when _FORTIFY_SOURCE=2 is used.
142 uint8_t b = '\0';
143 const int ret = write(user_abort_pipe[1], &b, 1);
144 (void)ret;
145 return;
146 }
147 #endif
148
149
150 extern void
io_no_sparse(void)151 io_no_sparse(void)
152 {
153 try_sparse = false;
154 return;
155 }
156
157
158 #ifdef ENABLE_SANDBOX
159 extern void
io_allow_sandbox(void)160 io_allow_sandbox(void)
161 {
162 sandbox_allowed = true;
163 return;
164 }
165
166
167 /// Enables operating-system-specific sandbox if it is possible.
168 /// src_fd is the file descriptor of the input file.
169 static void
io_sandbox_enter(int src_fd)170 io_sandbox_enter(int src_fd)
171 {
172 if (!sandbox_allowed) {
173 // This message is more often annoying than useful so
174 // it's commented out. It can be useful when developing
175 // the sandboxing code.
176 //message(V_DEBUG, _("Sandbox is disabled due "
177 // "to incompatible command line arguments"));
178 return;
179 }
180
181 const char dummy_str[] = "x";
182
183 // Try to ensure that both libc and xz locale files have been
184 // loaded when NLS is enabled.
185 snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL));
186
187 // Try to ensure that iconv data files needed for handling multibyte
188 // characters have been loaded. This is needed at least with glibc.
189 tuklib_mbstr_width(dummy_str, NULL);
190
191 #ifdef HAVE_CAPSICUM
192 // Capsicum needs FreeBSD 10.0 or later.
193 cap_rights_t rights;
194
195 if (cap_rights_limit(src_fd, cap_rights_init(&rights,
196 CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK)))
197 goto error;
198
199 if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights,
200 CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP,
201 CAP_WRITE, CAP_SEEK)))
202 goto error;
203
204 if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights,
205 CAP_EVENT)))
206 goto error;
207
208 if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights,
209 CAP_WRITE)))
210 goto error;
211
212 if (cap_enter())
213 goto error;
214
215 #else
216 # error ENABLE_SANDBOX is defined but no sandboxing method was found.
217 #endif
218
219 // This message is annoying in xz -lvv.
220 //message(V_DEBUG, _("Sandbox was successfully enabled"));
221 return;
222
223 error:
224 message(V_DEBUG, _("Failed to enable the sandbox"));
225 }
226 #endif // ENABLE_SANDBOX
227
228
229 #ifndef TUKLIB_DOSLIKE
230 /// \brief Waits for input or output to become available or for a signal
231 ///
232 /// This uses the self-pipe trick to avoid a race condition that can occur
233 /// if a signal is caught after user_abort has been checked but before e.g.
234 /// read() has been called. In that situation read() could block unless
235 /// non-blocking I/O is used. With non-blocking I/O something like select()
236 /// or poll() is needed to avoid a busy-wait loop, and the same race condition
237 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in
238 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is
239 /// old and very portable.
240 static io_wait_ret
io_wait(file_pair * pair,int timeout,bool is_reading)241 io_wait(file_pair *pair, int timeout, bool is_reading)
242 {
243 struct pollfd pfd[2];
244
245 if (is_reading) {
246 pfd[0].fd = pair->src_fd;
247 pfd[0].events = POLLIN;
248 } else {
249 pfd[0].fd = pair->dest_fd;
250 pfd[0].events = POLLOUT;
251 }
252
253 pfd[1].fd = user_abort_pipe[0];
254 pfd[1].events = POLLIN;
255
256 while (true) {
257 const int ret = poll(pfd, 2, timeout);
258
259 if (user_abort)
260 return IO_WAIT_ERROR;
261
262 if (ret == -1) {
263 if (errno == EINTR || errno == EAGAIN)
264 continue;
265
266 message_error(_("%s: poll() failed: %s"),
267 is_reading ? pair->src_name
268 : pair->dest_name,
269 strerror(errno));
270 return IO_WAIT_ERROR;
271 }
272
273 if (ret == 0)
274 return IO_WAIT_TIMEOUT;
275
276 if (pfd[0].revents != 0)
277 return IO_WAIT_MORE;
278 }
279 }
280 #endif
281
282
283 /// \brief Unlink a file
284 ///
285 /// This tries to verify that the file being unlinked really is the file that
286 /// we want to unlink by verifying device and inode numbers. There's still
287 /// a small unavoidable race, but this is much better than nothing (the file
288 /// could have been moved/replaced even hours earlier).
289 static void
io_unlink(const char * name,const struct stat * known_st)290 io_unlink(const char *name, const struct stat *known_st)
291 {
292 #if defined(TUKLIB_DOSLIKE)
293 // On DOS-like systems, st_ino is meaningless, so don't bother
294 // testing it. Just silence a compiler warning.
295 (void)known_st;
296 #else
297 struct stat new_st;
298
299 // If --force was used, use stat() instead of lstat(). This way
300 // (de)compressing symlinks works correctly. However, it also means
301 // that xz cannot detect if a regular file foo is renamed to bar
302 // and then a symlink foo -> bar is created. Because of stat()
303 // instead of lstat(), xz will think that foo hasn't been replaced
304 // with another file. Thus, xz will remove foo even though it no
305 // longer is the same file that xz used when it started compressing.
306 // Probably it's not too bad though, so this doesn't need a more
307 // complex fix.
308 const int stat_ret = opt_force
309 ? stat(name, &new_st) : lstat(name, &new_st);
310
311 if (stat_ret
312 # ifdef __VMS
313 // st_ino is an array, and we don't want to
314 // compare st_dev at all.
315 || memcmp(&new_st.st_ino, &known_st->st_ino,
316 sizeof(new_st.st_ino)) != 0
317 # else
318 // Typical POSIX-like system
319 || new_st.st_dev != known_st->st_dev
320 || new_st.st_ino != known_st->st_ino
321 # endif
322 )
323 // TRANSLATORS: When compression or decompression finishes,
324 // and xz is going to remove the source file, xz first checks
325 // if the source file still exists, and if it does, does its
326 // device and inode numbers match what xz saw when it opened
327 // the source file. If these checks fail, this message is
328 // shown, %s being the filename, and the file is not deleted.
329 // The check for device and inode numbers is there, because
330 // it is possible that the user has put a new file in place
331 // of the original file, and in that case it obviously
332 // shouldn't be removed.
333 message_error(_("%s: File seems to have been moved, "
334 "not removing"), name);
335 else
336 #endif
337 // There's a race condition between lstat() and unlink()
338 // but at least we have tried to avoid removing wrong file.
339 if (unlink(name))
340 message_error(_("%s: Cannot remove: %s"),
341 name, strerror(errno));
342
343 return;
344 }
345
346
347 /// \brief Copies owner/group and permissions
348 ///
349 /// \todo ACL and EA support
350 ///
351 static void
io_copy_attrs(const file_pair * pair)352 io_copy_attrs(const file_pair *pair)
353 {
354 // Skip chown and chmod on Windows.
355 #ifndef TUKLIB_DOSLIKE
356 // This function is more tricky than you may think at first.
357 // Blindly copying permissions may permit users to access the
358 // destination file who didn't have permission to access the
359 // source file.
360
361 // Try changing the owner of the file. If we aren't root or the owner
362 // isn't already us, fchown() probably doesn't succeed. We warn
363 // about failing fchown() only if we are root.
364 if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1))
365 && warn_fchown)
366 message_warning(_("%s: Cannot set the file owner: %s"),
367 pair->dest_name, strerror(errno));
368
369 mode_t mode;
370
371 if (fchown(pair->dest_fd, (uid_t)(-1), pair->src_st.st_gid)) {
372 message_warning(_("%s: Cannot set the file group: %s"),
373 pair->dest_name, strerror(errno));
374 // We can still safely copy some additional permissions:
375 // `group' must be at least as strict as `other' and
376 // also vice versa.
377 //
378 // NOTE: After this, the owner of the source file may
379 // get additional permissions. This shouldn't be too bad,
380 // because the owner would have had permission to chmod
381 // the original file anyway.
382 mode = ((pair->src_st.st_mode & 0070) >> 3)
383 & (pair->src_st.st_mode & 0007);
384 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode;
385 } else {
386 // Drop the setuid, setgid, and sticky bits.
387 mode = pair->src_st.st_mode & 0777;
388 }
389
390 if (fchmod(pair->dest_fd, mode))
391 message_warning(_("%s: Cannot set the file permissions: %s"),
392 pair->dest_name, strerror(errno));
393 #endif
394
395 // Copy the timestamps. We have several possible ways to do this, of
396 // which some are better in both security and precision.
397 //
398 // First, get the nanosecond part of the timestamps. As of writing,
399 // it's not standardized by POSIX, and there are several names for
400 // the same thing in struct stat.
401 long atime_nsec;
402 long mtime_nsec;
403
404 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
405 // GNU and Solaris
406 atime_nsec = pair->src_st.st_atim.tv_nsec;
407 mtime_nsec = pair->src_st.st_mtim.tv_nsec;
408
409 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
410 // BSD
411 atime_nsec = pair->src_st.st_atimespec.tv_nsec;
412 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
413
414 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
415 // GNU and BSD without extensions
416 atime_nsec = pair->src_st.st_atimensec;
417 mtime_nsec = pair->src_st.st_mtimensec;
418
419 # elif defined(HAVE_STRUCT_STAT_ST_UATIME)
420 // Tru64
421 atime_nsec = pair->src_st.st_uatime * 1000;
422 mtime_nsec = pair->src_st.st_umtime * 1000;
423
424 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
425 // UnixWare
426 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
427 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
428
429 # else
430 // Safe fallback
431 atime_nsec = 0;
432 mtime_nsec = 0;
433 # endif
434
435 // Construct a structure to hold the timestamps and call appropriate
436 // function to set the timestamps.
437 #if defined(HAVE_FUTIMENS)
438 // Use nanosecond precision.
439 struct timespec tv[2];
440 tv[0].tv_sec = pair->src_st.st_atime;
441 tv[0].tv_nsec = atime_nsec;
442 tv[1].tv_sec = pair->src_st.st_mtime;
443 tv[1].tv_nsec = mtime_nsec;
444
445 (void)futimens(pair->dest_fd, tv);
446
447 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
448 // Use microsecond precision.
449 struct timeval tv[2];
450 tv[0].tv_sec = pair->src_st.st_atime;
451 tv[0].tv_usec = atime_nsec / 1000;
452 tv[1].tv_sec = pair->src_st.st_mtime;
453 tv[1].tv_usec = mtime_nsec / 1000;
454
455 # if defined(HAVE_FUTIMES)
456 (void)futimes(pair->dest_fd, tv);
457 # elif defined(HAVE_FUTIMESAT)
458 (void)futimesat(pair->dest_fd, NULL, tv);
459 # else
460 // Argh, no function to use a file descriptor to set the timestamp.
461 (void)utimes(pair->dest_name, tv);
462 # endif
463
464 #elif defined(HAVE__FUTIME)
465 // Use one-second precision with Windows-specific _futime().
466 // We could use utime() too except that for some reason the
467 // timestamp will get reset at close(). With _futime() it works.
468 // This struct cannot be const as _futime() takes a non-const pointer.
469 struct _utimbuf buf = {
470 .actime = pair->src_st.st_atime,
471 .modtime = pair->src_st.st_mtime,
472 };
473
474 // Avoid warnings.
475 (void)atime_nsec;
476 (void)mtime_nsec;
477
478 (void)_futime(pair->dest_fd, &buf);
479
480 #elif defined(HAVE_UTIME)
481 // Use one-second precision. utime() doesn't support using file
482 // descriptor either. Some systems have broken utime() prototype
483 // so don't make this const.
484 struct utimbuf buf = {
485 .actime = pair->src_st.st_atime,
486 .modtime = pair->src_st.st_mtime,
487 };
488
489 // Avoid warnings.
490 (void)atime_nsec;
491 (void)mtime_nsec;
492
493 (void)utime(pair->dest_name, &buf);
494 #endif
495
496 return;
497 }
498
499
500 /// Opens the source file. Returns false on success, true on error.
501 static bool
io_open_src_real(file_pair * pair)502 io_open_src_real(file_pair *pair)
503 {
504 // There's nothing to open when reading from stdin.
505 if (pair->src_name == stdin_filename) {
506 pair->src_fd = STDIN_FILENO;
507 #ifdef TUKLIB_DOSLIKE
508 setmode(STDIN_FILENO, O_BINARY);
509 #else
510 // Try to set stdin to non-blocking mode. It won't work
511 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such
512 // case we proceed as if stdin were non-blocking anyway
513 // (in case of /dev/null it will be in practice). The
514 // same applies to stdout in io_open_dest_real().
515 stdin_flags = fcntl(STDIN_FILENO, F_GETFL);
516 if (stdin_flags == -1) {
517 message_error(_("Error getting the file status flags "
518 "from standard input: %s"),
519 strerror(errno));
520 return true;
521 }
522
523 if ((stdin_flags & O_NONBLOCK) == 0
524 && fcntl(STDIN_FILENO, F_SETFL,
525 stdin_flags | O_NONBLOCK) != -1)
526 restore_stdin_flags = true;
527 #endif
528 #ifdef HAVE_POSIX_FADVISE
529 // It will fail if stdin is a pipe and that's fine.
530 (void)posix_fadvise(STDIN_FILENO, 0, 0,
531 opt_mode == MODE_LIST
532 ? POSIX_FADV_RANDOM
533 : POSIX_FADV_SEQUENTIAL);
534 #endif
535 return false;
536 }
537
538 // Symlinks are not followed unless writing to stdout or --force
539 // was used.
540 const bool follow_symlinks = opt_stdout || opt_force;
541
542 // We accept only regular files if we are writing the output
543 // to disk too. bzip2 allows overriding this with --force but
544 // gzip and xz don't.
545 const bool reg_files_only = !opt_stdout;
546
547 // Flags for open()
548 int flags = O_RDONLY | O_BINARY | O_NOCTTY;
549
550 #ifndef TUKLIB_DOSLIKE
551 // Use non-blocking I/O:
552 // - It prevents blocking when opening FIFOs and some other
553 // special files, which is good if we want to accept only
554 // regular files.
555 // - It can help avoiding some race conditions with signal handling.
556 flags |= O_NONBLOCK;
557 #endif
558
559 #if defined(O_NOFOLLOW)
560 if (!follow_symlinks)
561 flags |= O_NOFOLLOW;
562 #elif !defined(TUKLIB_DOSLIKE)
563 // Some POSIX-like systems lack O_NOFOLLOW (it's not required
564 // by POSIX). Check for symlinks with a separate lstat() on
565 // these systems.
566 if (!follow_symlinks) {
567 struct stat st;
568 if (lstat(pair->src_name, &st)) {
569 message_error("%s: %s", pair->src_name,
570 strerror(errno));
571 return true;
572
573 } else if (S_ISLNK(st.st_mode)) {
574 message_warning(_("%s: Is a symbolic link, "
575 "skipping"), pair->src_name);
576 return true;
577 }
578 }
579 #else
580 // Avoid warnings.
581 (void)follow_symlinks;
582 #endif
583
584 // Try to open the file. Signals have been blocked so EINTR shouldn't
585 // be possible.
586 pair->src_fd = open(pair->src_name, flags);
587
588 if (pair->src_fd == -1) {
589 // Signals (that have a signal handler) have been blocked.
590 assert(errno != EINTR);
591
592 #ifdef O_NOFOLLOW
593 // Give an understandable error message if the reason
594 // for failing was that the file was a symbolic link.
595 //
596 // Note that at least Linux, OpenBSD, Solaris, and Darwin
597 // use ELOOP to indicate that O_NOFOLLOW was the reason
598 // that open() failed. Because there may be
599 // directories in the pathname, ELOOP may occur also
600 // because of a symlink loop in the directory part.
601 // So ELOOP doesn't tell us what actually went wrong,
602 // and this stupidity went into POSIX-1.2008 too.
603 //
604 // FreeBSD associates EMLINK with O_NOFOLLOW and
605 // Tru64 uses ENOTSUP. We use these directly here
606 // and skip the lstat() call and the associated race.
607 // I want to hear if there are other kernels that
608 // fail with something else than ELOOP with O_NOFOLLOW.
609 bool was_symlink = false;
610
611 # if defined(__FreeBSD__) || defined(__DragonFly__)
612 if (errno == EMLINK)
613 was_symlink = true;
614
615 # elif defined(__digital__) && defined(__unix__)
616 if (errno == ENOTSUP)
617 was_symlink = true;
618
619 # elif defined(__NetBSD__)
620 if (errno == EFTYPE)
621 was_symlink = true;
622
623 # else
624 if (errno == ELOOP && !follow_symlinks) {
625 const int saved_errno = errno;
626 struct stat st;
627 if (lstat(pair->src_name, &st) == 0
628 && S_ISLNK(st.st_mode))
629 was_symlink = true;
630
631 errno = saved_errno;
632 }
633 # endif
634
635 if (was_symlink)
636 message_warning(_("%s: Is a symbolic link, "
637 "skipping"), pair->src_name);
638 else
639 #endif
640 // Something else than O_NOFOLLOW failing
641 // (assuming that the race conditions didn't
642 // confuse us).
643 message_error("%s: %s", pair->src_name,
644 strerror(errno));
645
646 return true;
647 }
648
649 // Stat the source file. We need the result also when we copy
650 // the permissions, and when unlinking.
651 //
652 // NOTE: Use stat() instead of fstat() with DJGPP, because
653 // then we have a better chance to get st_ino value that can
654 // be used in io_open_dest_real() to prevent overwriting the
655 // source file.
656 #ifdef __DJGPP__
657 if (stat(pair->src_name, &pair->src_st))
658 goto error_msg;
659 #else
660 if (fstat(pair->src_fd, &pair->src_st))
661 goto error_msg;
662 #endif
663
664 if (S_ISDIR(pair->src_st.st_mode)) {
665 message_warning(_("%s: Is a directory, skipping"),
666 pair->src_name);
667 goto error;
668 }
669
670 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) {
671 message_warning(_("%s: Not a regular file, skipping"),
672 pair->src_name);
673 goto error;
674 }
675
676 #ifndef TUKLIB_DOSLIKE
677 if (reg_files_only && !opt_force) {
678 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
679 // gzip rejects setuid and setgid files even
680 // when --force was used. bzip2 doesn't check
681 // for them, but calls fchown() after fchmod(),
682 // and many systems automatically drop setuid
683 // and setgid bits there.
684 //
685 // We accept setuid and setgid files if
686 // --force was used. We drop these bits
687 // explicitly in io_copy_attr().
688 message_warning(_("%s: File has setuid or "
689 "setgid bit set, skipping"),
690 pair->src_name);
691 goto error;
692 }
693
694 if (pair->src_st.st_mode & S_ISVTX) {
695 message_warning(_("%s: File has sticky bit "
696 "set, skipping"),
697 pair->src_name);
698 goto error;
699 }
700
701 if (pair->src_st.st_nlink > 1) {
702 message_warning(_("%s: Input file has more "
703 "than one hard link, "
704 "skipping"), pair->src_name);
705 goto error;
706 }
707 }
708
709 // If it is something else than a regular file, wait until
710 // there is input available. This way reading from FIFOs
711 // will work when open() is used with O_NONBLOCK.
712 if (!S_ISREG(pair->src_st.st_mode)) {
713 signals_unblock();
714 const io_wait_ret ret = io_wait(pair, -1, true);
715 signals_block();
716
717 if (ret != IO_WAIT_MORE)
718 goto error;
719 }
720 #endif
721
722 #ifdef HAVE_POSIX_FADVISE
723 // It will fail with some special files like FIFOs but that is fine.
724 (void)posix_fadvise(pair->src_fd, 0, 0,
725 opt_mode == MODE_LIST
726 ? POSIX_FADV_RANDOM
727 : POSIX_FADV_SEQUENTIAL);
728 #endif
729
730 return false;
731
732 error_msg:
733 message_error("%s: %s", pair->src_name, strerror(errno));
734 error:
735 (void)close(pair->src_fd);
736 return true;
737 }
738
739
740 extern file_pair *
io_open_src(const char * src_name)741 io_open_src(const char *src_name)
742 {
743 if (is_empty_filename(src_name))
744 return NULL;
745
746 // Since we have only one file open at a time, we can use
747 // a statically allocated structure.
748 static file_pair pair;
749
750 pair = (file_pair){
751 .src_name = src_name,
752 .dest_name = NULL,
753 .src_fd = -1,
754 .dest_fd = -1,
755 .src_eof = false,
756 .src_has_seen_input = false,
757 .flush_needed = false,
758 .dest_try_sparse = false,
759 .dest_pending_sparse = 0,
760 };
761
762 // Block the signals, for which we have a custom signal handler, so
763 // that we don't need to worry about EINTR.
764 signals_block();
765 const bool error = io_open_src_real(&pair);
766 signals_unblock();
767
768 #ifdef ENABLE_SANDBOX
769 if (!error)
770 io_sandbox_enter(pair.src_fd);
771 #endif
772
773 return error ? NULL : &pair;
774 }
775
776
777 /// \brief Closes source file of the file_pair structure
778 ///
779 /// \param pair File whose src_fd should be closed
780 /// \param success If true, the file will be removed from the disk if
781 /// closing succeeds and --keep hasn't been used.
782 static void
io_close_src(file_pair * pair,bool success)783 io_close_src(file_pair *pair, bool success)
784 {
785 #ifndef TUKLIB_DOSLIKE
786 if (restore_stdin_flags) {
787 assert(pair->src_fd == STDIN_FILENO);
788
789 restore_stdin_flags = false;
790
791 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1)
792 message_error(_("Error restoring the status flags "
793 "to standard input: %s"),
794 strerror(errno));
795 }
796 #endif
797
798 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
799 // Close the file before possibly unlinking it. On DOS-like
800 // systems this is always required since unlinking will fail
801 // if the file is open. On POSIX systems it usually works
802 // to unlink open files, but in some cases it doesn't and
803 // one gets EBUSY in errno.
804 //
805 // xz 5.2.2 and older unlinked the file before closing it
806 // (except on DOS-like systems). The old code didn't handle
807 // EBUSY and could fail e.g. on some CIFS shares. The
808 // advantage of unlinking before closing is negligible
809 // (avoids a race between close() and stat()/lstat() and
810 // unlink()), so let's keep this simple.
811 (void)close(pair->src_fd);
812
813 if (success && !opt_keep_original)
814 io_unlink(pair->src_name, &pair->src_st);
815 }
816
817 return;
818 }
819
820
821 static bool
io_open_dest_real(file_pair * pair)822 io_open_dest_real(file_pair *pair)
823 {
824 if (opt_stdout || pair->src_fd == STDIN_FILENO) {
825 // We don't modify or free() this.
826 pair->dest_name = (char *)"(stdout)";
827 pair->dest_fd = STDOUT_FILENO;
828 #ifdef TUKLIB_DOSLIKE
829 setmode(STDOUT_FILENO, O_BINARY);
830 #else
831 // Try to set O_NONBLOCK if it isn't already set.
832 // If it fails, we assume that stdout is non-blocking
833 // in practice. See the comments in io_open_src_real()
834 // for similar situation with stdin.
835 //
836 // NOTE: O_APPEND may be unset later in this function
837 // and it relies on stdout_flags being set here.
838 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
839 if (stdout_flags == -1) {
840 message_error(_("Error getting the file status flags "
841 "from standard output: %s"),
842 strerror(errno));
843 return true;
844 }
845
846 if ((stdout_flags & O_NONBLOCK) == 0
847 && fcntl(STDOUT_FILENO, F_SETFL,
848 stdout_flags | O_NONBLOCK) != -1)
849 restore_stdout_flags = true;
850 #endif
851 } else {
852 pair->dest_name = suffix_get_dest_name(pair->src_name);
853 if (pair->dest_name == NULL)
854 return true;
855
856 #ifdef __DJGPP__
857 struct stat st;
858 if (stat(pair->dest_name, &st) == 0) {
859 // Check that it isn't a special file like "prn".
860 if (st.st_dev == -1) {
861 message_error("%s: Refusing to write to "
862 "a DOS special file",
863 pair->dest_name);
864 free(pair->dest_name);
865 return true;
866 }
867
868 // Check that we aren't overwriting the source file.
869 if (st.st_dev == pair->src_st.st_dev
870 && st.st_ino == pair->src_st.st_ino) {
871 message_error("%s: Output file is the same "
872 "as the input file",
873 pair->dest_name);
874 free(pair->dest_name);
875 return true;
876 }
877 }
878 #endif
879
880 // If --force was used, unlink the target file first.
881 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
882 message_error(_("%s: Cannot remove: %s"),
883 pair->dest_name, strerror(errno));
884 free(pair->dest_name);
885 return true;
886 }
887
888 // Open the file.
889 int flags = O_WRONLY | O_BINARY | O_NOCTTY
890 | O_CREAT | O_EXCL;
891 #ifndef TUKLIB_DOSLIKE
892 flags |= O_NONBLOCK;
893 #endif
894 const mode_t mode = S_IRUSR | S_IWUSR;
895 pair->dest_fd = open(pair->dest_name, flags, mode);
896
897 if (pair->dest_fd == -1) {
898 message_error("%s: %s", pair->dest_name,
899 strerror(errno));
900 free(pair->dest_name);
901 return true;
902 }
903 }
904
905 #ifndef TUKLIB_DOSLIKE
906 // dest_st isn't used on DOS-like systems except as a dummy
907 // argument to io_unlink(), so don't fstat() on such systems.
908 if (fstat(pair->dest_fd, &pair->dest_st)) {
909 // If fstat() really fails, we have a safe fallback here.
910 # if defined(__VMS)
911 pair->dest_st.st_ino[0] = 0;
912 pair->dest_st.st_ino[1] = 0;
913 pair->dest_st.st_ino[2] = 0;
914 # else
915 pair->dest_st.st_dev = 0;
916 pair->dest_st.st_ino = 0;
917 # endif
918 } else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
919 // When writing to standard output, we need to be extra
920 // careful:
921 // - It may be connected to something else than
922 // a regular file.
923 // - We aren't necessarily writing to a new empty file
924 // or to the end of an existing file.
925 // - O_APPEND may be active.
926 //
927 // TODO: I'm keeping this disabled for DOS-like systems
928 // for now. FAT doesn't support sparse files, but NTFS
929 // does, so maybe this should be enabled on Windows after
930 // some testing.
931 if (pair->dest_fd == STDOUT_FILENO) {
932 if (!S_ISREG(pair->dest_st.st_mode))
933 return false;
934
935 if (stdout_flags & O_APPEND) {
936 // Creating a sparse file is not possible
937 // when O_APPEND is active (it's used by
938 // shell's >> redirection). As I understand
939 // it, it is safe to temporarily disable
940 // O_APPEND in xz, because if someone
941 // happened to write to the same file at the
942 // same time, results would be bad anyway
943 // (users shouldn't assume that xz uses any
944 // specific block size when writing data).
945 //
946 // The write position may be something else
947 // than the end of the file, so we must fix
948 // it to start writing at the end of the file
949 // to imitate O_APPEND.
950 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
951 return false;
952
953 // Construct the new file status flags.
954 // If O_NONBLOCK was set earlier in this
955 // function, it must be kept here too.
956 int flags = stdout_flags & ~O_APPEND;
957 if (restore_stdout_flags)
958 flags |= O_NONBLOCK;
959
960 // If this fcntl() fails, we continue but won't
961 // try to create sparse output. The original
962 // flags will still be restored if needed (to
963 // unset O_NONBLOCK) when the file is finished.
964 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1)
965 return false;
966
967 // Disabling O_APPEND succeeded. Mark
968 // that the flags should be restored
969 // in io_close_dest(). (This may have already
970 // been set when enabling O_NONBLOCK.)
971 restore_stdout_flags = true;
972
973 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
974 != pair->dest_st.st_size) {
975 // Writing won't start exactly at the end
976 // of the file. We cannot use sparse output,
977 // because it would probably corrupt the file.
978 return false;
979 }
980 }
981
982 pair->dest_try_sparse = true;
983 }
984 #endif
985
986 return false;
987 }
988
989
990 extern bool
io_open_dest(file_pair * pair)991 io_open_dest(file_pair *pair)
992 {
993 signals_block();
994 const bool ret = io_open_dest_real(pair);
995 signals_unblock();
996 return ret;
997 }
998
999
1000 /// \brief Closes destination file of the file_pair structure
1001 ///
1002 /// \param pair File whose dest_fd should be closed
1003 /// \param success If false, the file will be removed from the disk.
1004 ///
1005 /// \return Zero if closing succeeds. On error, -1 is returned and
1006 /// error message printed.
1007 static bool
io_close_dest(file_pair * pair,bool success)1008 io_close_dest(file_pair *pair, bool success)
1009 {
1010 #ifndef TUKLIB_DOSLIKE
1011 // If io_open_dest() has disabled O_APPEND, restore it here.
1012 if (restore_stdout_flags) {
1013 assert(pair->dest_fd == STDOUT_FILENO);
1014
1015 restore_stdout_flags = false;
1016
1017 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) {
1018 message_error(_("Error restoring the O_APPEND flag "
1019 "to standard output: %s"),
1020 strerror(errno));
1021 return true;
1022 }
1023 }
1024 #endif
1025
1026 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
1027 return false;
1028
1029 if (close(pair->dest_fd)) {
1030 message_error(_("%s: Closing the file failed: %s"),
1031 pair->dest_name, strerror(errno));
1032
1033 // Closing destination file failed, so we cannot trust its
1034 // contents. Get rid of junk:
1035 io_unlink(pair->dest_name, &pair->dest_st);
1036 free(pair->dest_name);
1037 return true;
1038 }
1039
1040 // If the operation using this file wasn't successful, we git rid
1041 // of the junk file.
1042 if (!success)
1043 io_unlink(pair->dest_name, &pair->dest_st);
1044
1045 free(pair->dest_name);
1046
1047 return false;
1048 }
1049
1050
1051 extern void
io_close(file_pair * pair,bool success)1052 io_close(file_pair *pair, bool success)
1053 {
1054 // Take care of sparseness at the end of the output file.
1055 if (success && pair->dest_try_sparse
1056 && pair->dest_pending_sparse > 0) {
1057 // Seek forward one byte less than the size of the pending
1058 // hole, then write one zero-byte. This way the file grows
1059 // to its correct size. An alternative would be to use
1060 // ftruncate() but that isn't portable enough (e.g. it
1061 // doesn't work with FAT on Linux; FAT isn't that important
1062 // since it doesn't support sparse files anyway, but we don't
1063 // want to create corrupt files on it).
1064 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1,
1065 SEEK_CUR) == -1) {
1066 message_error(_("%s: Seeking failed when trying "
1067 "to create a sparse file: %s"),
1068 pair->dest_name, strerror(errno));
1069 success = false;
1070 } else {
1071 const uint8_t zero[1] = { '\0' };
1072 if (io_write_buf(pair, zero, 1))
1073 success = false;
1074 }
1075 }
1076
1077 signals_block();
1078
1079 // Copy the file attributes. We need to skip this if destination
1080 // file isn't open or it is standard output.
1081 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO)
1082 io_copy_attrs(pair);
1083
1084 // Close the destination first. If it fails, we must not remove
1085 // the source file!
1086 if (io_close_dest(pair, success))
1087 success = false;
1088
1089 // Close the source file, and unlink it if the operation using this
1090 // file pair was successful and we haven't requested to keep the
1091 // source file.
1092 io_close_src(pair, success);
1093
1094 signals_unblock();
1095
1096 return;
1097 }
1098
1099
1100 extern void
io_fix_src_pos(file_pair * pair,size_t rewind_size)1101 io_fix_src_pos(file_pair *pair, size_t rewind_size)
1102 {
1103 assert(rewind_size <= IO_BUFFER_SIZE);
1104
1105 if (rewind_size > 0) {
1106 // This doesn't need to work on unseekable file descriptors,
1107 // so just ignore possible errors.
1108 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR);
1109 }
1110
1111 return;
1112 }
1113
1114
1115 extern size_t
io_read(file_pair * pair,io_buf * buf,size_t size)1116 io_read(file_pair *pair, io_buf *buf, size_t size)
1117 {
1118 // We use small buffers here.
1119 assert(size < SSIZE_MAX);
1120
1121 size_t pos = 0;
1122
1123 while (pos < size) {
1124 const ssize_t amount = read(
1125 pair->src_fd, buf->u8 + pos, size - pos);
1126
1127 if (amount == 0) {
1128 pair->src_eof = true;
1129 break;
1130 }
1131
1132 if (amount == -1) {
1133 if (errno == EINTR) {
1134 if (user_abort)
1135 return SIZE_MAX;
1136
1137 continue;
1138 }
1139
1140 #ifndef TUKLIB_DOSLIKE
1141 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1142 // Disable the flush-timeout if no input has
1143 // been seen since the previous flush and thus
1144 // there would be nothing to flush after the
1145 // timeout expires (avoids busy waiting).
1146 const int timeout = pair->src_has_seen_input
1147 ? mytime_get_flush_timeout()
1148 : -1;
1149
1150 switch (io_wait(pair, timeout, true)) {
1151 case IO_WAIT_MORE:
1152 continue;
1153
1154 case IO_WAIT_ERROR:
1155 return SIZE_MAX;
1156
1157 case IO_WAIT_TIMEOUT:
1158 pair->flush_needed = true;
1159 return pos;
1160
1161 default:
1162 message_bug();
1163 }
1164 }
1165 #endif
1166
1167 message_error(_("%s: Read error: %s"),
1168 pair->src_name, strerror(errno));
1169
1170 return SIZE_MAX;
1171 }
1172
1173 pos += (size_t)(amount);
1174
1175 if (!pair->src_has_seen_input) {
1176 pair->src_has_seen_input = true;
1177 mytime_set_flush_time();
1178 }
1179 }
1180
1181 return pos;
1182 }
1183
1184
1185 extern bool
io_pread(file_pair * pair,io_buf * buf,size_t size,off_t pos)1186 io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos)
1187 {
1188 // Using lseek() and read() is more portable than pread() and
1189 // for us it is as good as real pread().
1190 if (lseek(pair->src_fd, pos, SEEK_SET) != pos) {
1191 message_error(_("%s: Error seeking the file: %s"),
1192 pair->src_name, strerror(errno));
1193 return true;
1194 }
1195
1196 const size_t amount = io_read(pair, buf, size);
1197 if (amount == SIZE_MAX)
1198 return true;
1199
1200 if (amount != size) {
1201 message_error(_("%s: Unexpected end of file"),
1202 pair->src_name);
1203 return true;
1204 }
1205
1206 return false;
1207 }
1208
1209
1210 static bool
is_sparse(const io_buf * buf)1211 is_sparse(const io_buf *buf)
1212 {
1213 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0);
1214
1215 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i)
1216 if (buf->u64[i] != 0)
1217 return false;
1218
1219 return true;
1220 }
1221
1222
1223 static bool
io_write_buf(file_pair * pair,const uint8_t * buf,size_t size)1224 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
1225 {
1226 assert(size < SSIZE_MAX);
1227
1228 while (size > 0) {
1229 const ssize_t amount = write(pair->dest_fd, buf, size);
1230 if (amount == -1) {
1231 if (errno == EINTR) {
1232 if (user_abort)
1233 return true;
1234
1235 continue;
1236 }
1237
1238 #ifndef TUKLIB_DOSLIKE
1239 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1240 if (io_wait(pair, -1, false) == IO_WAIT_MORE)
1241 continue;
1242
1243 return true;
1244 }
1245 #endif
1246
1247 // Handle broken pipe specially. gzip and bzip2
1248 // don't print anything on SIGPIPE. In addition,
1249 // gzip --quiet uses exit status 2 (warning) on
1250 // broken pipe instead of whatever raise(SIGPIPE)
1251 // would make it return. It is there to hide "Broken
1252 // pipe" message on some old shells (probably old
1253 // GNU bash).
1254 //
1255 // We don't do anything special with --quiet, which
1256 // is what bzip2 does too. If we get SIGPIPE, we
1257 // will handle it like other signals by setting
1258 // user_abort, and get EPIPE here.
1259 if (errno != EPIPE)
1260 message_error(_("%s: Write error: %s"),
1261 pair->dest_name, strerror(errno));
1262
1263 return true;
1264 }
1265
1266 buf += (size_t)(amount);
1267 size -= (size_t)(amount);
1268 }
1269
1270 return false;
1271 }
1272
1273
1274 extern bool
io_write(file_pair * pair,const io_buf * buf,size_t size)1275 io_write(file_pair *pair, const io_buf *buf, size_t size)
1276 {
1277 assert(size <= IO_BUFFER_SIZE);
1278
1279 if (pair->dest_try_sparse) {
1280 // Check if the block is sparse (contains only zeros). If it
1281 // sparse, we just store the amount and return. We will take
1282 // care of actually skipping over the hole when we hit the
1283 // next data block or close the file.
1284 //
1285 // Since io_close() requires that dest_pending_sparse > 0
1286 // if the file ends with sparse block, we must also return
1287 // if size == 0 to avoid doing the lseek().
1288 if (size == IO_BUFFER_SIZE) {
1289 // Even if the block was sparse, treat it as non-sparse
1290 // if the pending sparse amount is large compared to
1291 // the size of off_t. In practice this only matters
1292 // on 32-bit systems where off_t isn't always 64 bits.
1293 const off_t pending_max
1294 = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2);
1295 if (is_sparse(buf) && pair->dest_pending_sparse
1296 < pending_max) {
1297 pair->dest_pending_sparse += (off_t)(size);
1298 return false;
1299 }
1300 } else if (size == 0) {
1301 return false;
1302 }
1303
1304 // This is not a sparse block. If we have a pending hole,
1305 // skip it now.
1306 if (pair->dest_pending_sparse > 0) {
1307 if (lseek(pair->dest_fd, pair->dest_pending_sparse,
1308 SEEK_CUR) == -1) {
1309 message_error(_("%s: Seeking failed when "
1310 "trying to create a sparse "
1311 "file: %s"), pair->dest_name,
1312 strerror(errno));
1313 return true;
1314 }
1315
1316 pair->dest_pending_sparse = 0;
1317 }
1318 }
1319
1320 return io_write_buf(pair, buf->u8, size);
1321 }
1322