1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file file_io.c
4 /// \brief File opening, unlinking, and closing
5 //
6 // Author: Lasse Collin
7 //
8 // This file has been put into the public domain.
9 // You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12
13 #include "private.h"
14
15 #include <fcntl.h>
16
17 #ifdef TUKLIB_DOSLIKE
18 # include <io.h>
19 #else
20 # include <poll.h>
21 static bool warn_fchown;
22 #endif
23
24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
25 # include <sys/time.h>
26 #elif defined(HAVE_UTIME)
27 # include <utime.h>
28 #endif
29
30 #include "tuklib_open_stdxxx.h"
31
32 #ifndef O_BINARY
33 # define O_BINARY 0
34 #endif
35
36 #ifndef O_NOCTTY
37 # define O_NOCTTY 0
38 #endif
39
40
41 typedef enum {
42 IO_WAIT_MORE, // Reading or writing is possible.
43 IO_WAIT_ERROR, // Error or user_abort
44 IO_WAIT_TIMEOUT, // poll() timed out
45 } io_wait_ret;
46
47
48 /// If true, try to create sparse files when decompressing.
49 static bool try_sparse = true;
50
51 #ifndef TUKLIB_DOSLIKE
52 /// File status flags of standard input. This is used by io_open_src()
53 /// and io_close_src().
54 static int stdin_flags;
55 static bool restore_stdin_flags = false;
56
57 /// Original file status flags of standard output. This is used by
58 /// io_open_dest() and io_close_dest() to save and restore the flags.
59 static int stdout_flags;
60 static bool restore_stdout_flags = false;
61
62 /// Self-pipe used together with the user_abort variable to avoid
63 /// race conditions with signal handling.
64 static int user_abort_pipe[2];
65 #endif
66
67
68 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size);
69
70
71 extern void
io_init(void)72 io_init(void)
73 {
74 // Make sure that stdin, stdout, and stderr are connected to
75 // a valid file descriptor. Exit immediately with exit code ERROR
76 // if we cannot make the file descriptors valid. Maybe we should
77 // print an error message, but our stderr could be screwed anyway.
78 tuklib_open_stdxxx(E_ERROR);
79
80 #ifndef TUKLIB_DOSLIKE
81 // If fchown() fails setting the owner, we warn about it only if
82 // we are root.
83 warn_fchown = geteuid() == 0;
84
85 // Create a pipe for the self-pipe trick. If pipe2() is available,
86 // we can avoid the fcntl() calls.
87 # ifdef HAVE_PIPE2
88 if (pipe2(user_abort_pipe, O_NONBLOCK))
89 message_fatal(_("Error creating a pipe: %s"),
90 strerror(errno));
91 # else
92 if (pipe(user_abort_pipe))
93 message_fatal(_("Error creating a pipe: %s"),
94 strerror(errno));
95
96 // Make both ends of the pipe non-blocking.
97 for (unsigned i = 0; i < 2; ++i) {
98 int flags = fcntl(user_abort_pipe[i], F_GETFL);
99 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL,
100 flags | O_NONBLOCK) == -1)
101 message_fatal(_("Error creating a pipe: %s"),
102 strerror(errno));
103 }
104 # endif
105 #endif
106
107 #ifdef __DJGPP__
108 // Avoid doing useless things when statting files.
109 // This isn't important but doesn't hurt.
110 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
111 #endif
112
113 return;
114 }
115
116
117 #ifndef TUKLIB_DOSLIKE
118 extern void
io_write_to_user_abort_pipe(void)119 io_write_to_user_abort_pipe(void)
120 {
121 // If the write() fails, it's probably due to the pipe being full.
122 // Failing in that case is fine. If the reason is something else,
123 // there's not much we can do since this is called in a signal
124 // handler. So ignore the errors and try to avoid warnings with
125 // GCC and glibc when _FORTIFY_SOURCE=2 is used.
126 uint8_t b = '\0';
127 const int ret = write(user_abort_pipe[1], &b, 1);
128 (void)ret;
129 return;
130 }
131 #endif
132
133
134 extern void
io_no_sparse(void)135 io_no_sparse(void)
136 {
137 try_sparse = false;
138 return;
139 }
140
141
142 #ifndef TUKLIB_DOSLIKE
143 /// \brief Waits for input or output to become available or for a signal
144 ///
145 /// This uses the self-pipe trick to avoid a race condition that can occur
146 /// if a signal is caught after user_abort has been checked but before e.g.
147 /// read() has been called. In that situation read() could block unless
148 /// non-blocking I/O is used. With non-blocking I/O something like select()
149 /// or poll() is needed to avoid a busy-wait loop, and the same race condition
150 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in
151 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is
152 /// old and very portable.
153 static io_wait_ret
io_wait(file_pair * pair,int timeout,bool is_reading)154 io_wait(file_pair *pair, int timeout, bool is_reading)
155 {
156 struct pollfd pfd[2];
157
158 if (is_reading) {
159 pfd[0].fd = pair->src_fd;
160 pfd[0].events = POLLIN;
161 } else {
162 pfd[0].fd = pair->dest_fd;
163 pfd[0].events = POLLOUT;
164 }
165
166 pfd[1].fd = user_abort_pipe[0];
167 pfd[1].events = POLLIN;
168
169 while (true) {
170 const int ret = poll(pfd, 2, timeout);
171
172 if (user_abort)
173 return IO_WAIT_ERROR;
174
175 if (ret == -1) {
176 if (errno == EINTR || errno == EAGAIN)
177 continue;
178
179 message_error(_("%s: poll() failed: %s"),
180 is_reading ? pair->src_name
181 : pair->dest_name,
182 strerror(errno));
183 return IO_WAIT_ERROR;
184 }
185
186 if (ret == 0) {
187 assert(opt_flush_timeout != 0);
188 flush_needed = true;
189 return IO_WAIT_TIMEOUT;
190 }
191
192 if (pfd[0].revents != 0)
193 return IO_WAIT_MORE;
194 }
195 }
196 #endif
197
198
199 /// \brief Unlink a file
200 ///
201 /// This tries to verify that the file being unlinked really is the file that
202 /// we want to unlink by verifying device and inode numbers. There's still
203 /// a small unavoidable race, but this is much better than nothing (the file
204 /// could have been moved/replaced even hours earlier).
205 static void
io_unlink(const char * name,const struct stat * known_st)206 io_unlink(const char *name, const struct stat *known_st)
207 {
208 #if defined(TUKLIB_DOSLIKE)
209 // On DOS-like systems, st_ino is meaningless, so don't bother
210 // testing it. Just silence a compiler warning.
211 (void)known_st;
212 #else
213 struct stat new_st;
214
215 // If --force was used, use stat() instead of lstat(). This way
216 // (de)compressing symlinks works correctly. However, it also means
217 // that xz cannot detect if a regular file foo is renamed to bar
218 // and then a symlink foo -> bar is created. Because of stat()
219 // instead of lstat(), xz will think that foo hasn't been replaced
220 // with another file. Thus, xz will remove foo even though it no
221 // longer is the same file that xz used when it started compressing.
222 // Probably it's not too bad though, so this doesn't need a more
223 // complex fix.
224 const int stat_ret = opt_force
225 ? stat(name, &new_st) : lstat(name, &new_st);
226
227 if (stat_ret
228 # ifdef __VMS
229 // st_ino is an array, and we don't want to
230 // compare st_dev at all.
231 || memcmp(&new_st.st_ino, &known_st->st_ino,
232 sizeof(new_st.st_ino)) != 0
233 # else
234 // Typical POSIX-like system
235 || new_st.st_dev != known_st->st_dev
236 || new_st.st_ino != known_st->st_ino
237 # endif
238 )
239 // TRANSLATORS: When compression or decompression finishes,
240 // and xz is going to remove the source file, xz first checks
241 // if the source file still exists, and if it does, does its
242 // device and inode numbers match what xz saw when it opened
243 // the source file. If these checks fail, this message is
244 // shown, %s being the filename, and the file is not deleted.
245 // The check for device and inode numbers is there, because
246 // it is possible that the user has put a new file in place
247 // of the original file, and in that case it obviously
248 // shouldn't be removed.
249 message_error(_("%s: File seems to have been moved, "
250 "not removing"), name);
251 else
252 #endif
253 // There's a race condition between lstat() and unlink()
254 // but at least we have tried to avoid removing wrong file.
255 if (unlink(name))
256 message_error(_("%s: Cannot remove: %s"),
257 name, strerror(errno));
258
259 return;
260 }
261
262
263 /// \brief Copies owner/group and permissions
264 ///
265 /// \todo ACL and EA support
266 ///
267 static void
io_copy_attrs(const file_pair * pair)268 io_copy_attrs(const file_pair *pair)
269 {
270 // Skip chown and chmod on Windows.
271 #ifndef TUKLIB_DOSLIKE
272 // This function is more tricky than you may think at first.
273 // Blindly copying permissions may permit users to access the
274 // destination file who didn't have permission to access the
275 // source file.
276
277 // Try changing the owner of the file. If we aren't root or the owner
278 // isn't already us, fchown() probably doesn't succeed. We warn
279 // about failing fchown() only if we are root.
280 if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown)
281 message_warning(_("%s: Cannot set the file owner: %s"),
282 pair->dest_name, strerror(errno));
283
284 mode_t mode;
285
286 if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) {
287 message_warning(_("%s: Cannot set the file group: %s"),
288 pair->dest_name, strerror(errno));
289 // We can still safely copy some additional permissions:
290 // `group' must be at least as strict as `other' and
291 // also vice versa.
292 //
293 // NOTE: After this, the owner of the source file may
294 // get additional permissions. This shouldn't be too bad,
295 // because the owner would have had permission to chmod
296 // the original file anyway.
297 mode = ((pair->src_st.st_mode & 0070) >> 3)
298 & (pair->src_st.st_mode & 0007);
299 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode;
300 } else {
301 // Drop the setuid, setgid, and sticky bits.
302 mode = pair->src_st.st_mode & 0777;
303 }
304
305 if (fchmod(pair->dest_fd, mode))
306 message_warning(_("%s: Cannot set the file permissions: %s"),
307 pair->dest_name, strerror(errno));
308 #endif
309
310 // Copy the timestamps. We have several possible ways to do this, of
311 // which some are better in both security and precision.
312 //
313 // First, get the nanosecond part of the timestamps. As of writing,
314 // it's not standardized by POSIX, and there are several names for
315 // the same thing in struct stat.
316 long atime_nsec;
317 long mtime_nsec;
318
319 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
320 // GNU and Solaris
321 atime_nsec = pair->src_st.st_atim.tv_nsec;
322 mtime_nsec = pair->src_st.st_mtim.tv_nsec;
323
324 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
325 // BSD
326 atime_nsec = pair->src_st.st_atimespec.tv_nsec;
327 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
328
329 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
330 // GNU and BSD without extensions
331 atime_nsec = pair->src_st.st_atimensec;
332 mtime_nsec = pair->src_st.st_mtimensec;
333
334 # elif defined(HAVE_STRUCT_STAT_ST_UATIME)
335 // Tru64
336 atime_nsec = pair->src_st.st_uatime * 1000;
337 mtime_nsec = pair->src_st.st_umtime * 1000;
338
339 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
340 // UnixWare
341 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
342 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
343
344 # else
345 // Safe fallback
346 atime_nsec = 0;
347 mtime_nsec = 0;
348 # endif
349
350 // Construct a structure to hold the timestamps and call appropriate
351 // function to set the timestamps.
352 #if defined(HAVE_FUTIMENS)
353 // Use nanosecond precision.
354 struct timespec tv[2];
355 tv[0].tv_sec = pair->src_st.st_atime;
356 tv[0].tv_nsec = atime_nsec;
357 tv[1].tv_sec = pair->src_st.st_mtime;
358 tv[1].tv_nsec = mtime_nsec;
359
360 (void)futimens(pair->dest_fd, tv);
361
362 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
363 // Use microsecond precision.
364 struct timeval tv[2];
365 tv[0].tv_sec = pair->src_st.st_atime;
366 tv[0].tv_usec = atime_nsec / 1000;
367 tv[1].tv_sec = pair->src_st.st_mtime;
368 tv[1].tv_usec = mtime_nsec / 1000;
369
370 # if defined(HAVE_FUTIMES)
371 (void)futimes(pair->dest_fd, tv);
372 # elif defined(HAVE_FUTIMESAT)
373 (void)futimesat(pair->dest_fd, NULL, tv);
374 # else
375 // Argh, no function to use a file descriptor to set the timestamp.
376 (void)utimes(pair->dest_name, tv);
377 # endif
378
379 #elif defined(HAVE_UTIME)
380 // Use one-second precision. utime() doesn't support using file
381 // descriptor either. Some systems have broken utime() prototype
382 // so don't make this const.
383 struct utimbuf buf = {
384 .actime = pair->src_st.st_atime,
385 .modtime = pair->src_st.st_mtime,
386 };
387
388 // Avoid warnings.
389 (void)atime_nsec;
390 (void)mtime_nsec;
391
392 (void)utime(pair->dest_name, &buf);
393 #endif
394
395 return;
396 }
397
398
399 /// Opens the source file. Returns false on success, true on error.
400 static bool
io_open_src_real(file_pair * pair)401 io_open_src_real(file_pair *pair)
402 {
403 // There's nothing to open when reading from stdin.
404 if (pair->src_name == stdin_filename) {
405 pair->src_fd = STDIN_FILENO;
406 #ifdef TUKLIB_DOSLIKE
407 setmode(STDIN_FILENO, O_BINARY);
408 #else
409 // Try to set stdin to non-blocking mode. It won't work
410 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such
411 // case we proceed as if stdin were non-blocking anyway
412 // (in case of /dev/null it will be in practice). The
413 // same applies to stdout in io_open_dest_real().
414 stdin_flags = fcntl(STDIN_FILENO, F_GETFL);
415 if (stdin_flags == -1) {
416 message_error(_("Error getting the file status flags "
417 "from standard input: %s"),
418 strerror(errno));
419 return true;
420 }
421
422 if ((stdin_flags & O_NONBLOCK) == 0
423 && fcntl(STDIN_FILENO, F_SETFL,
424 stdin_flags | O_NONBLOCK) != -1)
425 restore_stdin_flags = true;
426 #endif
427 #ifdef HAVE_POSIX_FADVISE
428 // It will fail if stdin is a pipe and that's fine.
429 (void)posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL);
430 #endif
431 return false;
432 }
433
434 // Symlinks are not followed unless writing to stdout or --force
435 // was used.
436 const bool follow_symlinks = opt_stdout || opt_force;
437
438 // We accept only regular files if we are writing the output
439 // to disk too. bzip2 allows overriding this with --force but
440 // gzip and xz don't.
441 const bool reg_files_only = !opt_stdout;
442
443 // Flags for open()
444 int flags = O_RDONLY | O_BINARY | O_NOCTTY;
445
446 #ifndef TUKLIB_DOSLIKE
447 // Use non-blocking I/O:
448 // - It prevents blocking when opening FIFOs and some other
449 // special files, which is good if we want to accept only
450 // regular files.
451 // - It can help avoiding some race conditions with signal handling.
452 flags |= O_NONBLOCK;
453 #endif
454
455 #if defined(O_NOFOLLOW)
456 if (!follow_symlinks)
457 flags |= O_NOFOLLOW;
458 #elif !defined(TUKLIB_DOSLIKE)
459 // Some POSIX-like systems lack O_NOFOLLOW (it's not required
460 // by POSIX). Check for symlinks with a separate lstat() on
461 // these systems.
462 if (!follow_symlinks) {
463 struct stat st;
464 if (lstat(pair->src_name, &st)) {
465 message_error("%s: %s", pair->src_name,
466 strerror(errno));
467 return true;
468
469 } else if (S_ISLNK(st.st_mode)) {
470 message_warning(_("%s: Is a symbolic link, "
471 "skipping"), pair->src_name);
472 return true;
473 }
474 }
475 #else
476 // Avoid warnings.
477 (void)follow_symlinks;
478 #endif
479
480 // Try to open the file. Signals have been blocked so EINTR shouldn't
481 // be possible.
482 pair->src_fd = open(pair->src_name, flags);
483
484 if (pair->src_fd == -1) {
485 // Signals (that have a signal handler) have been blocked.
486 assert(errno != EINTR);
487
488 #ifdef O_NOFOLLOW
489 // Give an understandable error message if the reason
490 // for failing was that the file was a symbolic link.
491 //
492 // Note that at least Linux, OpenBSD, Solaris, and Darwin
493 // use ELOOP to indicate that O_NOFOLLOW was the reason
494 // that open() failed. Because there may be
495 // directories in the pathname, ELOOP may occur also
496 // because of a symlink loop in the directory part.
497 // So ELOOP doesn't tell us what actually went wrong,
498 // and this stupidity went into POSIX-1.2008 too.
499 //
500 // FreeBSD associates EMLINK with O_NOFOLLOW and
501 // Tru64 uses ENOTSUP. We use these directly here
502 // and skip the lstat() call and the associated race.
503 // I want to hear if there are other kernels that
504 // fail with something else than ELOOP with O_NOFOLLOW.
505 bool was_symlink = false;
506
507 # if defined(__FreeBSD__) || defined(__DragonFly__)
508 if (errno == EMLINK)
509 was_symlink = true;
510
511 # elif defined(__digital__) && defined(__unix__)
512 if (errno == ENOTSUP)
513 was_symlink = true;
514
515 # elif defined(__NetBSD__)
516 if (errno == EFTYPE)
517 was_symlink = true;
518
519 # else
520 if (errno == ELOOP && !follow_symlinks) {
521 const int saved_errno = errno;
522 struct stat st;
523 if (lstat(pair->src_name, &st) == 0
524 && S_ISLNK(st.st_mode))
525 was_symlink = true;
526
527 errno = saved_errno;
528 }
529 # endif
530
531 if (was_symlink)
532 message_warning(_("%s: Is a symbolic link, "
533 "skipping"), pair->src_name);
534 else
535 #endif
536 // Something else than O_NOFOLLOW failing
537 // (assuming that the race conditions didn't
538 // confuse us).
539 message_error("%s: %s", pair->src_name,
540 strerror(errno));
541
542 return true;
543 }
544
545 // Stat the source file. We need the result also when we copy
546 // the permissions, and when unlinking.
547 //
548 // NOTE: Use stat() instead of fstat() with DJGPP, because
549 // then we have a better chance to get st_ino value that can
550 // be used in io_open_dest_real() to prevent overwriting the
551 // source file.
552 #ifdef __DJGPP__
553 if (stat(pair->src_name, &pair->src_st))
554 goto error_msg;
555 #else
556 if (fstat(pair->src_fd, &pair->src_st))
557 goto error_msg;
558 #endif
559
560 if (S_ISDIR(pair->src_st.st_mode)) {
561 message_warning(_("%s: Is a directory, skipping"),
562 pair->src_name);
563 goto error;
564 }
565
566 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) {
567 message_warning(_("%s: Not a regular file, skipping"),
568 pair->src_name);
569 goto error;
570 }
571
572 #ifndef TUKLIB_DOSLIKE
573 if (reg_files_only && !opt_force) {
574 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
575 // gzip rejects setuid and setgid files even
576 // when --force was used. bzip2 doesn't check
577 // for them, but calls fchown() after fchmod(),
578 // and many systems automatically drop setuid
579 // and setgid bits there.
580 //
581 // We accept setuid and setgid files if
582 // --force was used. We drop these bits
583 // explicitly in io_copy_attr().
584 message_warning(_("%s: File has setuid or "
585 "setgid bit set, skipping"),
586 pair->src_name);
587 goto error;
588 }
589
590 if (pair->src_st.st_mode & S_ISVTX) {
591 message_warning(_("%s: File has sticky bit "
592 "set, skipping"),
593 pair->src_name);
594 goto error;
595 }
596
597 if (pair->src_st.st_nlink > 1) {
598 message_warning(_("%s: Input file has more "
599 "than one hard link, "
600 "skipping"), pair->src_name);
601 goto error;
602 }
603 }
604
605 // If it is something else than a regular file, wait until
606 // there is input available. This way reading from FIFOs
607 // will work when open() is used with O_NONBLOCK.
608 if (!S_ISREG(pair->src_st.st_mode)) {
609 signals_unblock();
610 const io_wait_ret ret = io_wait(pair, -1, true);
611 signals_block();
612
613 if (ret != IO_WAIT_MORE)
614 goto error;
615 }
616 #endif
617
618 #ifdef HAVE_POSIX_FADVISE
619 // It will fail with some special files like FIFOs but that is fine.
620 (void)posix_fadvise(pair->src_fd, 0, 0, POSIX_FADV_SEQUENTIAL);
621 #endif
622
623 return false;
624
625 error_msg:
626 message_error("%s: %s", pair->src_name, strerror(errno));
627 error:
628 (void)close(pair->src_fd);
629 return true;
630 }
631
632
633 extern file_pair *
io_open_src(const char * src_name)634 io_open_src(const char *src_name)
635 {
636 if (is_empty_filename(src_name))
637 return NULL;
638
639 // Since we have only one file open at a time, we can use
640 // a statically allocated structure.
641 static file_pair pair;
642
643 pair = (file_pair){
644 .src_name = src_name,
645 .dest_name = NULL,
646 .src_fd = -1,
647 .dest_fd = -1,
648 .src_eof = false,
649 .dest_try_sparse = false,
650 .dest_pending_sparse = 0,
651 };
652
653 // Block the signals, for which we have a custom signal handler, so
654 // that we don't need to worry about EINTR.
655 signals_block();
656 const bool error = io_open_src_real(&pair);
657 signals_unblock();
658
659 return error ? NULL : &pair;
660 }
661
662
663 /// \brief Closes source file of the file_pair structure
664 ///
665 /// \param pair File whose src_fd should be closed
666 /// \param success If true, the file will be removed from the disk if
667 /// closing succeeds and --keep hasn't been used.
668 static void
io_close_src(file_pair * pair,bool success)669 io_close_src(file_pair *pair, bool success)
670 {
671 #ifndef TUKLIB_DOSLIKE
672 if (restore_stdin_flags) {
673 assert(pair->src_fd == STDIN_FILENO);
674
675 restore_stdin_flags = false;
676
677 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1)
678 message_error(_("Error restoring the status flags "
679 "to standard input: %s"),
680 strerror(errno));
681 }
682 #endif
683
684 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
685 #ifdef TUKLIB_DOSLIKE
686 (void)close(pair->src_fd);
687 #endif
688
689 // If we are going to unlink(), do it before closing the file.
690 // This way there's no risk that someone replaces the file and
691 // happens to get same inode number, which would make us
692 // unlink() wrong file.
693 //
694 // NOTE: DOS-like systems are an exception to this, because
695 // they don't allow unlinking files that are open. *sigh*
696 if (success && !opt_keep_original)
697 io_unlink(pair->src_name, &pair->src_st);
698
699 #ifndef TUKLIB_DOSLIKE
700 (void)close(pair->src_fd);
701 #endif
702 }
703
704 return;
705 }
706
707
708 static bool
io_open_dest_real(file_pair * pair)709 io_open_dest_real(file_pair *pair)
710 {
711 if (opt_stdout || pair->src_fd == STDIN_FILENO) {
712 // We don't modify or free() this.
713 pair->dest_name = (char *)"(stdout)";
714 pair->dest_fd = STDOUT_FILENO;
715 #ifdef TUKLIB_DOSLIKE
716 setmode(STDOUT_FILENO, O_BINARY);
717 #else
718 // Try to set O_NONBLOCK if it isn't already set.
719 // If it fails, we assume that stdout is non-blocking
720 // in practice. See the comments in io_open_src_real()
721 // for similar situation with stdin.
722 //
723 // NOTE: O_APPEND may be unset later in this function
724 // and it relies on stdout_flags being set here.
725 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
726 if (stdout_flags == -1) {
727 message_error(_("Error getting the file status flags "
728 "from standard output: %s"),
729 strerror(errno));
730 return true;
731 }
732
733 if ((stdout_flags & O_NONBLOCK) == 0
734 && fcntl(STDOUT_FILENO, F_SETFL,
735 stdout_flags | O_NONBLOCK) != -1)
736 restore_stdout_flags = true;
737 #endif
738 } else {
739 pair->dest_name = suffix_get_dest_name(pair->src_name);
740 if (pair->dest_name == NULL)
741 return true;
742
743 #ifdef __DJGPP__
744 struct stat st;
745 if (stat(pair->dest_name, &st) == 0) {
746 // Check that it isn't a special file like "prn".
747 if (st.st_dev == -1) {
748 message_error("%s: Refusing to write to "
749 "a DOS special file",
750 pair->dest_name);
751 free(pair->dest_name);
752 return true;
753 }
754
755 // Check that we aren't overwriting the source file.
756 if (st.st_dev == pair->src_st.st_dev
757 && st.st_ino == pair->src_st.st_ino) {
758 message_error("%s: Output file is the same "
759 "as the input file",
760 pair->dest_name);
761 free(pair->dest_name);
762 return true;
763 }
764 }
765 #endif
766
767 // If --force was used, unlink the target file first.
768 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
769 message_error(_("%s: Cannot remove: %s"),
770 pair->dest_name, strerror(errno));
771 free(pair->dest_name);
772 return true;
773 }
774
775 // Open the file.
776 int flags = O_WRONLY | O_BINARY | O_NOCTTY
777 | O_CREAT | O_EXCL;
778 #ifndef TUKLIB_DOSLIKE
779 flags |= O_NONBLOCK;
780 #endif
781 const mode_t mode = S_IRUSR | S_IWUSR;
782 pair->dest_fd = open(pair->dest_name, flags, mode);
783
784 if (pair->dest_fd == -1) {
785 message_error("%s: %s", pair->dest_name,
786 strerror(errno));
787 free(pair->dest_name);
788 return true;
789 }
790 }
791
792 #ifndef TUKLIB_DOSLIKE
793 // dest_st isn't used on DOS-like systems except as a dummy
794 // argument to io_unlink(), so don't fstat() on such systems.
795 if (fstat(pair->dest_fd, &pair->dest_st)) {
796 // If fstat() really fails, we have a safe fallback here.
797 # if defined(__VMS)
798 pair->dest_st.st_ino[0] = 0;
799 pair->dest_st.st_ino[1] = 0;
800 pair->dest_st.st_ino[2] = 0;
801 # else
802 pair->dest_st.st_dev = 0;
803 pair->dest_st.st_ino = 0;
804 # endif
805 } else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
806 // When writing to standard output, we need to be extra
807 // careful:
808 // - It may be connected to something else than
809 // a regular file.
810 // - We aren't necessarily writing to a new empty file
811 // or to the end of an existing file.
812 // - O_APPEND may be active.
813 //
814 // TODO: I'm keeping this disabled for DOS-like systems
815 // for now. FAT doesn't support sparse files, but NTFS
816 // does, so maybe this should be enabled on Windows after
817 // some testing.
818 if (pair->dest_fd == STDOUT_FILENO) {
819 if (!S_ISREG(pair->dest_st.st_mode))
820 return false;
821
822 if (stdout_flags & O_APPEND) {
823 // Creating a sparse file is not possible
824 // when O_APPEND is active (it's used by
825 // shell's >> redirection). As I understand
826 // it, it is safe to temporarily disable
827 // O_APPEND in xz, because if someone
828 // happened to write to the same file at the
829 // same time, results would be bad anyway
830 // (users shouldn't assume that xz uses any
831 // specific block size when writing data).
832 //
833 // The write position may be something else
834 // than the end of the file, so we must fix
835 // it to start writing at the end of the file
836 // to imitate O_APPEND.
837 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
838 return false;
839
840 // Construct the new file status flags.
841 // If O_NONBLOCK was set earlier in this
842 // function, it must be kept here too.
843 int flags = stdout_flags & ~O_APPEND;
844 if (restore_stdout_flags)
845 flags |= O_NONBLOCK;
846
847 // If this fcntl() fails, we continue but won't
848 // try to create sparse output. The original
849 // flags will still be restored if needed (to
850 // unset O_NONBLOCK) when the file is finished.
851 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1)
852 return false;
853
854 // Disabling O_APPEND succeeded. Mark
855 // that the flags should be restored
856 // in io_close_dest(). (This may have already
857 // been set when enabling O_NONBLOCK.)
858 restore_stdout_flags = true;
859
860 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
861 != pair->dest_st.st_size) {
862 // Writing won't start exactly at the end
863 // of the file. We cannot use sparse output,
864 // because it would probably corrupt the file.
865 return false;
866 }
867 }
868
869 pair->dest_try_sparse = true;
870 }
871 #endif
872
873 return false;
874 }
875
876
877 extern bool
io_open_dest(file_pair * pair)878 io_open_dest(file_pair *pair)
879 {
880 signals_block();
881 const bool ret = io_open_dest_real(pair);
882 signals_unblock();
883 return ret;
884 }
885
886
887 /// \brief Closes destination file of the file_pair structure
888 ///
889 /// \param pair File whose dest_fd should be closed
890 /// \param success If false, the file will be removed from the disk.
891 ///
892 /// \return Zero if closing succeeds. On error, -1 is returned and
893 /// error message printed.
894 static bool
io_close_dest(file_pair * pair,bool success)895 io_close_dest(file_pair *pair, bool success)
896 {
897 #ifndef TUKLIB_DOSLIKE
898 // If io_open_dest() has disabled O_APPEND, restore it here.
899 if (restore_stdout_flags) {
900 assert(pair->dest_fd == STDOUT_FILENO);
901
902 restore_stdout_flags = false;
903
904 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) {
905 message_error(_("Error restoring the O_APPEND flag "
906 "to standard output: %s"),
907 strerror(errno));
908 return true;
909 }
910 }
911 #endif
912
913 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
914 return false;
915
916 if (close(pair->dest_fd)) {
917 message_error(_("%s: Closing the file failed: %s"),
918 pair->dest_name, strerror(errno));
919
920 // Closing destination file failed, so we cannot trust its
921 // contents. Get rid of junk:
922 io_unlink(pair->dest_name, &pair->dest_st);
923 free(pair->dest_name);
924 return true;
925 }
926
927 // If the operation using this file wasn't successful, we git rid
928 // of the junk file.
929 if (!success)
930 io_unlink(pair->dest_name, &pair->dest_st);
931
932 free(pair->dest_name);
933
934 return false;
935 }
936
937
938 extern void
io_close(file_pair * pair,bool success)939 io_close(file_pair *pair, bool success)
940 {
941 // Take care of sparseness at the end of the output file.
942 if (success && pair->dest_try_sparse
943 && pair->dest_pending_sparse > 0) {
944 // Seek forward one byte less than the size of the pending
945 // hole, then write one zero-byte. This way the file grows
946 // to its correct size. An alternative would be to use
947 // ftruncate() but that isn't portable enough (e.g. it
948 // doesn't work with FAT on Linux; FAT isn't that important
949 // since it doesn't support sparse files anyway, but we don't
950 // want to create corrupt files on it).
951 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1,
952 SEEK_CUR) == -1) {
953 message_error(_("%s: Seeking failed when trying "
954 "to create a sparse file: %s"),
955 pair->dest_name, strerror(errno));
956 success = false;
957 } else {
958 const uint8_t zero[1] = { '\0' };
959 if (io_write_buf(pair, zero, 1))
960 success = false;
961 }
962 }
963
964 signals_block();
965
966 // Copy the file attributes. We need to skip this if destination
967 // file isn't open or it is standard output.
968 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO)
969 io_copy_attrs(pair);
970
971 // Close the destination first. If it fails, we must not remove
972 // the source file!
973 if (io_close_dest(pair, success))
974 success = false;
975
976 // Close the source file, and unlink it if the operation using this
977 // file pair was successful and we haven't requested to keep the
978 // source file.
979 io_close_src(pair, success);
980
981 signals_unblock();
982
983 return;
984 }
985
986
987 extern void
io_fix_src_pos(file_pair * pair,size_t rewind_size)988 io_fix_src_pos(file_pair *pair, size_t rewind_size)
989 {
990 assert(rewind_size <= IO_BUFFER_SIZE);
991
992 if (rewind_size > 0) {
993 // This doesn't need to work on unseekable file descriptors,
994 // so just ignore possible errors.
995 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR);
996 }
997
998 return;
999 }
1000
1001
1002 extern size_t
io_read(file_pair * pair,io_buf * buf_union,size_t size)1003 io_read(file_pair *pair, io_buf *buf_union, size_t size)
1004 {
1005 // We use small buffers here.
1006 assert(size < SSIZE_MAX);
1007
1008 uint8_t *buf = buf_union->u8;
1009 size_t left = size;
1010
1011 while (left > 0) {
1012 const ssize_t amount = read(pair->src_fd, buf, left);
1013
1014 if (amount == 0) {
1015 pair->src_eof = true;
1016 break;
1017 }
1018
1019 if (amount == -1) {
1020 if (errno == EINTR) {
1021 if (user_abort)
1022 return SIZE_MAX;
1023
1024 continue;
1025 }
1026
1027 #ifndef TUKLIB_DOSLIKE
1028 if (errno == EAGAIN || errno == EWOULDBLOCK) {
1029 const io_wait_ret ret = io_wait(pair,
1030 mytime_get_flush_timeout(),
1031 true);
1032 switch (ret) {
1033 case IO_WAIT_MORE:
1034 continue;
1035
1036 case IO_WAIT_ERROR:
1037 return SIZE_MAX;
1038
1039 case IO_WAIT_TIMEOUT:
1040 return size - left;
1041
1042 default:
1043 message_bug();
1044 }
1045 }
1046 #endif
1047
1048 message_error(_("%s: Read error: %s"),
1049 pair->src_name, strerror(errno));
1050
1051 return SIZE_MAX;
1052 }
1053
1054 buf += (size_t)(amount);
1055 left -= (size_t)(amount);
1056 }
1057
1058 return size - left;
1059 }
1060
1061
1062 extern bool
io_pread(file_pair * pair,io_buf * buf,size_t size,off_t pos)1063 io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos)
1064 {
1065 // Using lseek() and read() is more portable than pread() and
1066 // for us it is as good as real pread().
1067 if (lseek(pair->src_fd, pos, SEEK_SET) != pos) {
1068 message_error(_("%s: Error seeking the file: %s"),
1069 pair->src_name, strerror(errno));
1070 return true;
1071 }
1072
1073 const size_t amount = io_read(pair, buf, size);
1074 if (amount == SIZE_MAX)
1075 return true;
1076
1077 if (amount != size) {
1078 message_error(_("%s: Unexpected end of file"),
1079 pair->src_name);
1080 return true;
1081 }
1082
1083 return false;
1084 }
1085
1086
1087 static bool
is_sparse(const io_buf * buf)1088 is_sparse(const io_buf *buf)
1089 {
1090 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0);
1091
1092 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i)
1093 if (buf->u64[i] != 0)
1094 return false;
1095
1096 return true;
1097 }
1098
1099
1100 static bool
io_write_buf(file_pair * pair,const uint8_t * buf,size_t size)1101 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
1102 {
1103 assert(size < SSIZE_MAX);
1104
1105 while (size > 0) {
1106 const ssize_t amount = write(pair->dest_fd, buf, size);
1107 if (amount == -1) {
1108 if (errno == EINTR) {
1109 if (user_abort)
1110 return true;
1111
1112 continue;
1113 }
1114
1115 #ifndef TUKLIB_DOSLIKE
1116 if (errno == EAGAIN || errno == EWOULDBLOCK) {
1117 if (io_wait(pair, -1, false) == IO_WAIT_MORE)
1118 continue;
1119
1120 return true;
1121 }
1122 #endif
1123
1124 // Handle broken pipe specially. gzip and bzip2
1125 // don't print anything on SIGPIPE. In addition,
1126 // gzip --quiet uses exit status 2 (warning) on
1127 // broken pipe instead of whatever raise(SIGPIPE)
1128 // would make it return. It is there to hide "Broken
1129 // pipe" message on some old shells (probably old
1130 // GNU bash).
1131 //
1132 // We don't do anything special with --quiet, which
1133 // is what bzip2 does too. If we get SIGPIPE, we
1134 // will handle it like other signals by setting
1135 // user_abort, and get EPIPE here.
1136 if (errno != EPIPE)
1137 message_error(_("%s: Write error: %s"),
1138 pair->dest_name, strerror(errno));
1139
1140 return true;
1141 }
1142
1143 buf += (size_t)(amount);
1144 size -= (size_t)(amount);
1145 }
1146
1147 return false;
1148 }
1149
1150
1151 extern bool
io_write(file_pair * pair,const io_buf * buf,size_t size)1152 io_write(file_pair *pair, const io_buf *buf, size_t size)
1153 {
1154 assert(size <= IO_BUFFER_SIZE);
1155
1156 if (pair->dest_try_sparse) {
1157 // Check if the block is sparse (contains only zeros). If it
1158 // sparse, we just store the amount and return. We will take
1159 // care of actually skipping over the hole when we hit the
1160 // next data block or close the file.
1161 //
1162 // Since io_close() requires that dest_pending_sparse > 0
1163 // if the file ends with sparse block, we must also return
1164 // if size == 0 to avoid doing the lseek().
1165 if (size == IO_BUFFER_SIZE) {
1166 if (is_sparse(buf)) {
1167 pair->dest_pending_sparse += size;
1168 return false;
1169 }
1170 } else if (size == 0) {
1171 return false;
1172 }
1173
1174 // This is not a sparse block. If we have a pending hole,
1175 // skip it now.
1176 if (pair->dest_pending_sparse > 0) {
1177 if (lseek(pair->dest_fd, pair->dest_pending_sparse,
1178 SEEK_CUR) == -1) {
1179 message_error(_("%s: Seeking failed when "
1180 "trying to create a sparse "
1181 "file: %s"), pair->dest_name,
1182 strerror(errno));
1183 return true;
1184 }
1185
1186 pair->dest_pending_sparse = 0;
1187 }
1188 }
1189
1190 return io_write_buf(pair, buf->u8, size);
1191 }
1192