1 /* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: expandtab:ts=8:sw=4:softtabstop=4:
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file file_io.c
6 /// \brief File opening, unlinking, and closing
7 //
8 // Author: Lasse Collin
9 //
10 // This file has been put into the public domain.
11 // You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include <fcntl.h>
18
19 #ifdef DOSLIKE
20 # include <io.h>
21 #endif
22
23 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
24 # include <sys/time.h>
25 #elif defined(HAVE_UTIME)
26 # include <utime.h>
27 #endif
28
29 #ifndef O_BINARY
30 # define O_BINARY 0
31 #endif
32
33 #ifndef O_NOCTTY
34 # define O_NOCTTY 0
35 #endif
36
37 #ifndef DOSLIKE
38 # include "open_stdxxx.h"
39 static bool warn_fchown;
40 #endif
41
42
43 extern void
44 io_init(void)
45 {
46 #ifndef DOSLIKE
47 // Make sure that stdin, stdout, and and stderr are connected to
48 // a valid file descriptor. Exit immediatelly with exit code ERROR
49 // if we cannot make the file descriptors valid. Maybe we should
50 // print an error message, but our stderr could be screwed anyway.
51 open_stdxxx(E_ERROR);
52
53 // If fchown() fails setting the owner, we warn about it only if
54 // we are root.
55 warn_fchown = geteuid() == 0;
56 #endif
57
58 #ifdef __DJGPP__
59 // Avoid doing useless things when statting files.
60 // This isn't important but doesn't hurt.
61 _djstat_flags = _STAT_INODE | _STAT_EXEC_EXT
62 | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
63 #endif
64
65 return;
66 }
67
68
coder_set_check(lzma_check new_check)69 /// \brief Unlinks a file
70 ///
71 /// This tries to verify that the file being unlinked really is the file that
72 /// we want to unlink by verifying device and inode numbers. There's still
73 /// a small unavoidable race, but this is much better than nothing (the file
74 /// could have been moved/replaced even hours earlier).
75 static void
76 io_unlink(const char *name, const struct stat *known_st)
77 {
78 #ifdef DOSLIKE
79 // On Windows, st_ino is meaningless, so don't bother testing it.
80 // Just silence a compiler warning.
81 (void)known_st;
82 #else
83 struct stat new_st;
84
85 if (lstat(name, &new_st)
86 || new_st.st_dev != known_st->st_dev
87 || new_st.st_ino != known_st->st_ino)
88 message_error(_("%s: File seems to be moved, not removing"),
89 name);
90 else
91 #endif
92 // There's a race condition between lstat() and unlink()
93 // but at least we have tried to avoid removing wrong file.
94 if (unlink(name))
95 message_error(_("%s: Cannot remove: %s"),
96 name, strerror(errno));
97
98 return;
99 }
100
101
102 /// \brief Copies owner/group and permissions
103 ///
104 /// \todo ACL and EA support
105 ///
106 static void
107 io_copy_attrs(const file_pair *pair)
memlimit_too_small(uint64_t memory_usage,uint64_t memory_limit)108 {
109 // Skip chown and chmod on Windows.
110 #ifndef DOSLIKE
111 // This function is more tricky than you may think at first.
112 // Blindly copying permissions may permit users to access the
113 // destination file who didn't have permission to access the
114 // source file.
115
116 // Try changing the owner of the file. If we aren't root or the owner
117 // isn't already us, fchown() probably doesn't succeed. We warn
118 // about failing fchown() only if we are root.
119 if (fchown(pair->dest_fd, pair->src_st.st_uid, -1) && warn_fchown)
120 message_warning(_("%s: Cannot set the file owner: %s"),
121 pair->dest_name, strerror(errno));
122
123 mode_t mode;
124
125 if (fchown(pair->dest_fd, -1, pair->src_st.st_gid)) {
126 message_warning(_("%s: Cannot set the file group: %s"),
127 pair->dest_name, strerror(errno));
128 // We can still safely copy some additional permissions:
129 // `group' must be at least as strict as `other' and
130 // also vice versa.
131 //
132 // NOTE: After this, the owner of the source file may
133 // get additional permissions. This shouldn't be too bad,
134 // because the owner would have had permission to chmod
135 // the original file anyway.
136 mode = ((pair->src_st.st_mode & 0070) >> 3)
137 & (pair->src_st.st_mode & 0007);
138 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode;
139 } else {
140 // Drop the setuid, setgid, and sticky bits.
141 mode = pair->src_st.st_mode & 0777;
142 }
143
144 if (fchmod(pair->dest_fd, mode))
145 message_warning(_("%s: Cannot set the file permissions: %s"),
146 pair->dest_name, strerror(errno));
147 #endif
148
149 // Copy the timestamps. We have several possible ways to do this, of
150 // which some are better in both security and precision.
151 //
152 // First, get the nanosecond part of the timestamps. As of writing,
153 // it's not standardized by POSIX, and there are several names for
154 // the same thing in struct stat.
155 long atime_nsec;
156 long mtime_nsec;
157
158 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
159 // GNU and Solaris
160 atime_nsec = pair->src_st.st_atim.tv_nsec;
161 mtime_nsec = pair->src_st.st_mtim.tv_nsec;
162
163 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
164 // BSD
165 atime_nsec = pair->src_st.st_atimespec.tv_nsec;
166 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
167
168 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
169 // GNU and BSD without extensions
170 atime_nsec = pair->src_st.st_atimensec;
171 mtime_nsec = pair->src_st.st_mtimensec;
172
173 # elif defined(HAVE_STRUCT_STAT_ST_UATIME)
174 // Tru64
175 atime_nsec = pair->src_st.st_uatime * 1000;
176 mtime_nsec = pair->src_st.st_umtime * 1000;
177
178 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
179 // UnixWare
180 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
181 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
182
183 # else
184 // Safe fallback
185 atime_nsec = 0;
186 mtime_nsec = 0;
187 # endif
188
189 // Construct a structure to hold the timestamps and call appropriate
190 // function to set the timestamps.
191 #if defined(HAVE_FUTIMENS)
192 // Use nanosecond precision.
193 struct timespec tv[2];
194 tv[0].tv_sec = pair->src_st.st_atime;
195 tv[0].tv_nsec = atime_nsec;
196 tv[1].tv_sec = pair->src_st.st_mtime;
197 tv[1].tv_nsec = mtime_nsec;
198
199 (void)futimens(pair->dest_fd, tv);
200
201 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
202 // Use microsecond precision.
203 struct timeval tv[2];
204 tv[0].tv_sec = pair->src_st.st_atime;
205 tv[0].tv_usec = atime_nsec / 1000;
206 tv[1].tv_sec = pair->src_st.st_mtime;
207 tv[1].tv_usec = mtime_nsec / 1000;
208
209 # if defined(HAVE_FUTIMES)
210 (void)futimes(pair->dest_fd, tv);
211 # elif defined(HAVE_FUTIMESAT)
212 (void)futimesat(pair->dest_fd, NULL, tv);
213 # else
214 // Argh, no function to use a file descriptor to set the timestamp.
215 (void)utimes(pair->dest_name, tv);
216 # endif
217
218 #elif defined(HAVE_UTIME)
219 // Use one-second precision. utime() doesn't support using file
220 // descriptor either. Some systems have broken utime() prototype
221 // so don't make this const.
222 struct utimbuf buf = {
223 .actime = pair->src_st.st_atime,
224 .modtime = pair->src_st.st_mtime,
225 };
226
227 // Avoid warnings.
228 (void)atime_nsec;
229 (void)mtime_nsec;
230
231 (void)utime(pair->dest_name, &buf);
232 #endif
233
234 return;
235 }
236
237
238 /// Opens the source file. Returns false on success, true on error.
239 static bool
240 io_open_src(file_pair *pair)
241 {
242 // There's nothing to open when reading from stdin.
243 if (pair->src_name == stdin_filename) {
244 pair->src_fd = STDIN_FILENO;
245 #ifdef DOSLIKE
246 setmode(STDIN_FILENO, O_BINARY);
247 #endif
248 return false;
249 }
250
251 // We accept only regular files if we are writing the output
252 // to disk too, and if --force was not given.
253 const bool reg_files_only = !opt_stdout && !opt_force;
254
255 // Flags for open()
256 int flags = O_RDONLY | O_BINARY | O_NOCTTY;
257
258 #ifndef DOSLIKE
259 // If we accept only regular files, we need to be careful to avoid
260 // problems with special files like devices and FIFOs. O_NONBLOCK
261 // prevents blocking when opening such files. When we want to accept
262 // special files, we must not use O_NONBLOCK, or otherwise we won't
263 // block waiting e.g. FIFOs to become readable.
264 if (reg_files_only)
265 flags |= O_NONBLOCK;
266 #endif
267
268 #if defined(O_NOFOLLOW)
269 if (reg_files_only)
270 flags |= O_NOFOLLOW;
271 #elif !defined(DOSLIKE)
272 // Some POSIX-like systems lack O_NOFOLLOW (it's not required
273 // by POSIX). Check for symlinks with a separate lstat() on
274 // these systems.
275 if (reg_files_only) {
276 struct stat st;
277 if (lstat(pair->src_name, &st)) {
is_format_xz(void)278 message_error("%s: %s", pair->src_name,
279 strerror(errno));
280 return true;
281
282 } else if (S_ISLNK(st.st_mode)) {
283 message_warning(_("%s: Is a symbolic link, "
284 "skipping"), pair->src_name);
285 return true;
is_format_lzma(void)286 }
287 }
288 #endif
289
290 // Try to open the file. If we are accepting non-regular files,
291 // unblock the caught signals so that open() can be interrupted
292 // if it blocks e.g. due to a FIFO file.
293 if (!reg_files_only)
294 signals_unblock();
295
296 // Maybe this wouldn't need a loop, since all the signal handlers for
297 // which we don't use SA_RESTART set user_abort to true. But it
298 // doesn't hurt to have it just in case.
299 do {
300 pair->src_fd = open(pair->src_name, flags);
301 } while (pair->src_fd == -1 && errno == EINTR && !user_abort);
302
303 if (!reg_files_only)
304 signals_block();
305
306 if (pair->src_fd == -1) {
307 // If we were interrupted, don't display any error message.
308 if (errno == EINTR) {
309 // All the signals that don't have SA_RESTART
310 // set user_abort.
311 assert(user_abort);
312 return true;
313 }
314
315 #ifdef O_NOFOLLOW
316 // Give an understandable error message in if reason
317 // for failing was that the file was a symbolic link.
318 //
319 // Note that at least Linux, OpenBSD, Solaris, and Darwin
320 // use ELOOP to indicate if O_NOFOLLOW was the reason
321 // that open() failed. Because there may be
322 // directories in the pathname, ELOOP may occur also
323 // because of a symlink loop in the directory part.
324 // So ELOOP doesn't tell us what actually went wrong.
325 //
326 // FreeBSD associates EMLINK with O_NOFOLLOW and
327 // Tru64 uses ENOTSUP. We use these directly here
328 // and skip the lstat() call and the associated race.
329 // I want to hear if there are other kernels that
330 // fail with something else than ELOOP with O_NOFOLLOW.
331 bool was_symlink = false;
332
333 # if defined(__FreeBSD__) || defined(__DragonFly__)
334 if (errno == EMLINK)
335 was_symlink = true;
336
337 # elif defined(__digital__) && defined(__unix__)
coder_init(file_pair * pair)338 if (errno == ENOTSUP)
339 was_symlink = true;
340
341 # elif defined(__NetBSD__)
342 // FIXME? As of 2008-11-20, NetBSD doesn't document what
343 // errno is used with O_NOFOLLOW. It seems to be EFTYPE,
344 // but since it isn't documented, it may be wrong to rely
345 // on it here.
346 if (errno == EFTYPE)
347 was_symlink = true;
348
349 # else
350 if (errno == ELOOP && reg_files_only) {
351 const int saved_errno = errno;
352 struct stat st;
353 if (lstat(pair->src_name, &st) == 0
354 && S_ISLNK(st.st_mode))
355 was_symlink = true;
356
357 errno = saved_errno;
358 }
359 # endif
360
361 if (was_symlink)
362 message_warning(_("%s: Is a symbolic link, "
363 "skipping"), pair->src_name);
364 else
365 #endif
366 // Something else than O_NOFOLLOW failing
367 // (assuming that the race conditions didn't
368 // confuse us).
369 message_error("%s: %s", pair->src_name,
370 strerror(errno));
371
372 return true;
373 }
374
375 #ifndef DOSLIKE
376 // Drop O_NONBLOCK, which is used only when we are accepting only
377 // regular files. After the open() call, we want things to block
378 // instead of giving EAGAIN.
379 if (reg_files_only) {
380 flags = fcntl(pair->src_fd, F_GETFL);
381 if (flags == -1)
382 goto error_msg;
383
384 flags &= ~O_NONBLOCK;
385
386 if (fcntl(pair->src_fd, F_SETFL, flags))
387 goto error_msg;
388 }
389 #endif
390
391 // Stat the source file. We need the result also when we copy
392 // the permissions, and when unlinking.
393 if (fstat(pair->src_fd, &pair->src_st))
394 goto error_msg;
395
396 if (S_ISDIR(pair->src_st.st_mode)) {
397 message_warning(_("%s: Is a directory, skipping"),
398 pair->src_name);
399 goto error;
400 }
401
402 if (reg_files_only) {
403 if (!S_ISREG(pair->src_st.st_mode)) {
404 message_warning(_("%s: Not a regular file, "
405 "skipping"), pair->src_name);
406 goto error;
407 }
408
409 // These are meaningless on Windows.
410 #ifndef DOSLIKE
411 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
412 // gzip rejects setuid and setgid files even
413 // when --force was used. bzip2 doesn't check
414 // for them, but calls fchown() after fchmod(),
415 // and many systems automatically drop setuid
416 // and setgid bits there.
417 //
418 // We accept setuid and setgid files if
419 // --force was used. We drop these bits
420 // explicitly in io_copy_attr().
421 message_warning(_("%s: File has setuid or "
422 "setgid bit set, skipping"),
423 pair->src_name);
424 goto error;
425 }
426
427 if (pair->src_st.st_mode & S_ISVTX) {
428 message_warning(_("%s: File has sticky bit "
429 "set, skipping"),
430 pair->src_name);
431 goto error;
432 }
433
434 if (pair->src_st.st_nlink > 1) {
coder_normal(file_pair * pair)435 message_warning(_("%s: Input file has more "
436 "than one hard link, "
437 "skipping"), pair->src_name);
438 goto error;
439 }
440 #endif
441 }
442
443 return false;
444
445 error_msg:
446 message_error("%s: %s", pair->src_name, strerror(errno));
447 error:
448 (void)close(pair->src_fd);
449 return true;
450 }
451
452
453 /// \brief Closes source file of the file_pair structure
454 ///
455 /// \param pair File whose src_fd should be closed
456 /// \param success If true, the file will be removed from the disk if
457 /// closing succeeds and --keep hasn't been used.
458 static void
459 io_close_src(file_pair *pair, bool success)
460 {
461 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
462 #ifdef DOSLIKE
463 (void)close(pair->src_fd);
464 #endif
465
466 // If we are going to unlink(), do it before closing the file.
467 // This way there's no risk that someone replaces the file and
468 // happens to get same inode number, which would make us
469 // unlink() wrong file.
470 //
471 // NOTE: DOS-like systems are an exception to this, because
472 // they don't allow unlinking files that are open. *sigh*
473 if (success && !opt_keep_original)
474 io_unlink(pair->src_name, &pair->src_st);
475
476 #ifndef DOSLIKE
477 (void)close(pair->src_fd);
478 #endif
479 }
480
481 return;
482 }
483
484
485 static bool
486 io_open_dest(file_pair *pair)
487 {
488 if (opt_stdout || pair->src_fd == STDIN_FILENO) {
489 // We don't modify or free() this.
490 pair->dest_name = (char *)"(stdout)";
491 pair->dest_fd = STDOUT_FILENO;
492 #ifdef DOSLIKE
493 setmode(STDOUT_FILENO, O_BINARY);
494 #endif
495 return false;
496 }
497
498 pair->dest_name = suffix_get_dest_name(pair->src_name);
499 if (pair->dest_name == NULL)
500 return true;
501
502 // If --force was used, unlink the target file first.
503 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
504 message_error("%s: Cannot unlink: %s",
505 pair->dest_name, strerror(errno));
506 free(pair->dest_name);
507 return true;
508 }
509
510 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
511 message_error("%s: Cannot unlink: %s", pair->dest_name,
512 strerror(errno));
513 free(pair->dest_name);
514 return true;
515 }
516
517 // Open the file.
518 const int flags = O_WRONLY | O_BINARY | O_NOCTTY | O_CREAT | O_EXCL;
519 const mode_t mode = S_IRUSR | S_IWUSR;
520 pair->dest_fd = open(pair->dest_name, flags, mode);
521
522 if (pair->dest_fd == -1) {
523 // Don't bother with error message if user requested
524 // us to exit anyway.
525 if (!user_abort)
526 message_error("%s: %s", pair->dest_name,
527 strerror(errno));
528
529 free(pair->dest_name);
530 return true;
531 }
532
533 // If this really fails... well, we have a safe fallback.
534 if (fstat(pair->dest_fd, &pair->dest_st)) {
535 pair->dest_st.st_dev = 0;
536 pair->dest_st.st_ino = 0;
537 }
538
539 return false;
540 }
541
542
543 /// \brief Closes destination file of the file_pair structure
544 ///
545 /// \param pair File whose dest_fd should be closed
546 /// \param success If false, the file will be removed from the disk.
547 ///
548 /// \return Zero if closing succeeds. On error, -1 is returned and
549 /// error message printed.
550 static int
551 io_close_dest(file_pair *pair, bool success)
552 {
553 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
554 return 0;
555
556 if (close(pair->dest_fd)) {
557 message_error(_("%s: Closing the file failed: %s"),
558 pair->dest_name, strerror(errno));
559
560 // Closing destination file failed, so we cannot trust its
561 // contents. Get rid of junk:
562 io_unlink(pair->dest_name, &pair->dest_st);
563 free(pair->dest_name);
564 return -1;
565 }
566
567 // If the operation using this file wasn't successful, we git rid
568 // of the junk file.
569 if (!success)
570 io_unlink(pair->dest_name, &pair->dest_st);
571
572 free(pair->dest_name);
573
574 return 0;
575 }
576
577
coder_passthru(file_pair * pair)578 extern file_pair *
579 io_open(const char *src_name)
580 {
581 if (is_empty_filename(src_name))
582 return NULL;
583
584 // Since we have only one file open at a time, we can use
585 // a statically allocated structure.
586 static file_pair pair;
587
588 pair = (file_pair){
589 .src_name = src_name,
590 .dest_name = NULL,
591 .src_fd = -1,
592 .dest_fd = -1,
593 .src_eof = false,
594 };
595
596 // Block the signals, for which we have a custom signal handler, so
597 // that we don't need to worry about EINTR.
598 signals_block();
599
600 file_pair *ret = NULL;
601 if (!io_open_src(&pair)) {
602 // io_open_src() may have unblocked the signals temporarily,
603 // and thus user_abort may have got set even if open()
604 // succeeded.
605 if (user_abort || io_open_dest(&pair))
606 io_close_src(&pair, false);
607 else
608 ret = &pair;
609 }
610
611 signals_unblock();
612
613 return ret;
614 }
615
616
617 extern void
618 io_close(file_pair *pair, bool success)
619 {
620 signals_block();
621
622 if (success && pair->dest_fd != STDOUT_FILENO)
623 io_copy_attrs(pair);
624
625 // Close the destination first. If it fails, we must not remove
626 // the source file!
627 if (io_close_dest(pair, success))
628 success = false;
629
630 // Close the source file, and unlink it if the operation using this
631 // file pair was successful and we haven't requested to keep the
632 // source file.
633 io_close_src(pair, success);
634
635 signals_unblock();
636
637 return;
638 }
639
640
641 extern size_t
642 io_read(file_pair *pair, uint8_t *buf, size_t size)
643 {
644 // We use small buffers here.
645 assert(size < SSIZE_MAX);
646
647 size_t left = size;
648
649 while (left > 0) {
650 const ssize_t amount = read(pair->src_fd, buf, left);
651
652 if (amount == 0) {
653 pair->src_eof = true;
654 break;
655 }
656
657 if (amount == -1) {
658 if (errno == EINTR) {
659 if (user_abort)
660 return SIZE_MAX;
661
662 continue;
663 }
664
665 message_error(_("%s: Read error: %s"),
666 pair->src_name, strerror(errno));
667
668 // FIXME Is this needed?
669 pair->src_eof = true;
670
671 return SIZE_MAX;
672 }
673
674 buf += (size_t)(amount);
675 left -= (size_t)(amount);
676 }
677
678 return size - left;
679 }
680
681
682 extern bool
683 io_write(const file_pair *pair, const uint8_t *buf, size_t size)
684 {
685 assert(size < SSIZE_MAX);
686
687 while (size > 0) {
688 const ssize_t amount = write(pair->dest_fd, buf, size);
689 if (amount == -1) {
690 if (errno == EINTR) {
691 if (user_abort)
692 return -1;
693
694 continue;
695 }
696
697 // Handle broken pipe specially. gzip and bzip2
698 // don't print anything on SIGPIPE. In addition,
699 // gzip --quiet uses exit status 2 (warning) on
700 // broken pipe instead of whatever raise(SIGPIPE)
701 // would make it return. It is there to hide "Broken
702 // pipe" message on some old shells (probably old
703 // GNU bash).
704 //
705 // We don't do anything special with --quiet, which
706 // is what bzip2 does too. If we get SIGPIPE, we
707 // will handle it like other signals by setting
708 // user_abort, and get EPIPE here.
709 if (errno != EPIPE)
710 message_error(_("%s: Write error: %s"),
711 pair->dest_name, strerror(errno));
712
713 return true;
714 }
715
716 buf += (size_t)(amount);
717 size -= (size_t)(amount);
718 }
719
720 return false;
721 }
722