xref: /openbsd/bin/pax/ar_subs.c (revision f6aab3d8)
1 /*	$OpenBSD: ar_subs.c,v 1.51 2023/07/10 16:28:33 jeremy Exp $	*/
2 /*	$NetBSD: ar_subs.c,v 1.5 1995/03/21 09:07:06 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1992 Keith Muller.
6  * Copyright (c) 1992, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * Keith Muller of the University of California, San Diego.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <signal.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <time.h>
45 #include <unistd.h>
46 
47 #include "pax.h"
48 #include "extern.h"
49 
50 static void wr_archive(ARCHD *, int is_app);
51 static int get_arc(void);
52 static int next_head(ARCHD *);
53 extern sigset_t s_mask;
54 
55 /*
56  * Routines which control the overall operation modes of pax as specified by
57  * the user: list, append, read ...
58  */
59 
60 static char hdbuf[BLKMULT];		/* space for archive header on read */
61 u_long flcnt;				/* number of files processed */
62 
63 /*
64  * list()
65  *	list the contents of an archive which match user supplied pattern(s)
66  *	(no pattern matches all).
67  */
68 
69 void
70 list(void)
71 {
72 	ARCHD *arcn;
73 	int res;
74 	ARCHD archd;
75 	time_t now;
76 
77 	arcn = &archd;
78 	/*
79 	 * figure out archive type; pass any format specific options to the
80 	 * archive option processing routine; call the format init routine. We
81 	 * also save current time for ls_list() so we do not make a system
82 	 * call for each file we need to print. If verbose (vflag) start up
83 	 * the name and group caches.
84 	 */
85 	if ((get_arc() < 0) || ((*frmt->options)() < 0) ||
86 	    ((*frmt->st_rd)() < 0))
87 		return;
88 
89 	now = time(NULL);
90 
91 	/*
92 	 * step through the archive until the format says it is done
93 	 */
94 	while (next_head(arcn) == 0) {
95 		if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
96 			/*
97 			 * we need to read, to get the real filename
98 			 */
99 			off_t cnt;
100 			if (!rd_wrfile(arcn, arcn->type == PAX_GLF
101 			    ? -1 : -2, &cnt))
102 				(void)rd_skip(cnt + arcn->pad);
103 			continue;
104 		}
105 
106 		/*
107 		 * check for pattern, and user specified options match.
108 		 * When all patterns are matched we are done.
109 		 */
110 		if ((res = pat_match(arcn)) < 0)
111 			break;
112 
113 		if ((res == 0) && (sel_chk(arcn) == 0)) {
114 			/*
115 			 * pattern resulted in a selected file
116 			 */
117 			if (pat_sel(arcn) < 0)
118 				break;
119 
120 			/*
121 			 * modify the name as requested by the user if name
122 			 * survives modification, do a listing of the file
123 			 */
124 			if ((res = mod_name(arcn)) < 0)
125 				break;
126 			if (res == 0)
127 				ls_list(arcn, now, stdout);
128 		}
129 
130 		/*
131 		 * skip to next archive format header using values calculated
132 		 * by the format header read routine
133 		 */
134 		if (rd_skip(arcn->skip + arcn->pad) == 1)
135 			break;
136 	}
137 
138 	/*
139 	 * all done, let format have a chance to cleanup, and make sure that
140 	 * the patterns supplied by the user were all matched
141 	 */
142 	(void)(*frmt->end_rd)();
143 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
144 	ar_close(0);
145 	pat_chk();
146 }
147 
148 static int
149 cmp_file_times(int mtime_flag, int ctime_flag, ARCHD *arcn, struct stat *sbp)
150 {
151 	struct stat sb;
152 
153 	if (sbp == NULL) {
154 		if (lstat(arcn->name, &sb) != 0)
155 			return (0);
156 		sbp = &sb;
157 	}
158 
159 	if (ctime_flag && mtime_flag)
160 		return (timespeccmp(&arcn->sb.st_mtim, &sbp->st_mtim, <=) &&
161 		        timespeccmp(&arcn->sb.st_ctim, &sbp->st_ctim, <=));
162 	else if (ctime_flag)
163 		return (timespeccmp(&arcn->sb.st_ctim, &sbp->st_ctim, <=));
164 	else
165 		return (timespeccmp(&arcn->sb.st_mtim, &sbp->st_mtim, <=));
166 }
167 
168 /*
169  * extract()
170  *	extract the member(s) of an archive as specified by user supplied
171  *	pattern(s) (no patterns extracts all members)
172  */
173 
174 void
175 extract(void)
176 {
177 	ARCHD *arcn;
178 	int res;
179 	off_t cnt;
180 	ARCHD archd;
181 	int fd;
182 	time_t now;
183 
184 	sltab_start();
185 
186 	arcn = &archd;
187 	/*
188 	 * figure out archive type; pass any format specific options to the
189 	 * archive option processing routine; call the format init routine;
190 	 * start up the directory modification time and access mode database
191 	 */
192 	if ((get_arc() < 0) || ((*frmt->options)() < 0) ||
193 	    ((*frmt->st_rd)() < 0) || (dir_start() < 0))
194 		return;
195 
196 	/*
197 	 * When we are doing interactive rename, we store the mapping of names
198 	 * so we can fix up hard links files later in the archive.
199 	 */
200 	if (iflag && (name_start() < 0))
201 		return;
202 
203 	now = time(NULL);
204 
205 	/*
206 	 * step through each entry on the archive until the format read routine
207 	 * says it is done
208 	 */
209 	while (next_head(arcn) == 0) {
210 		if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
211 			/*
212 			 * we need to read, to get the real filename
213 			 */
214 			if (!rd_wrfile(arcn, arcn->type == PAX_GLF
215 			    ? -1 : -2, &cnt))
216 				(void)rd_skip(cnt + arcn->pad);
217 			continue;
218 		}
219 
220 		/*
221 		 * check for pattern, and user specified options match. When
222 		 * all the patterns are matched we are done
223 		 */
224 		if ((res = pat_match(arcn)) < 0)
225 			break;
226 
227 		if ((res > 0) || (sel_chk(arcn) != 0)) {
228 			/*
229 			 * file is not selected. skip past any file data and
230 			 * padding and go back for the next archive member
231 			 */
232 			(void)rd_skip(arcn->skip + arcn->pad);
233 			continue;
234 		}
235 
236 		/*
237 		 * with -u or -D only extract when the archive member is newer
238 		 * than the file with the same name in the file system (no
239 		 * test of being the same type is required).
240 		 * NOTE: this test is done BEFORE name modifications as
241 		 * specified by pax. this operation can be confusing to the
242 		 * user who might expect the test to be done on an existing
243 		 * file AFTER the name mod. In honesty the pax spec is probably
244 		 * flawed in this respect.
245 		 */
246 		if ((uflag || Dflag) &&
247 		    cmp_file_times(uflag, Dflag, arcn, NULL)) {
248 			(void)rd_skip(arcn->skip + arcn->pad);
249 			continue;
250 		}
251 
252 		/*
253 		 * this archive member is now been selected. modify the name.
254 		 */
255 		if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn)) < 0))
256 			break;
257 		if (res > 0) {
258 			/*
259 			 * a bad name mod, skip and purge name from link table
260 			 */
261 			purg_lnk(arcn);
262 			(void)rd_skip(arcn->skip + arcn->pad);
263 			continue;
264 		}
265 
266 		/*
267 		 * Non standard -Y and -Z flag. When the existing file is
268 		 * same age or newer skip
269 		 */
270 		if ((Yflag || Zflag) &&
271 		    cmp_file_times(Yflag, Zflag, arcn, NULL)) {
272 			(void)rd_skip(arcn->skip + arcn->pad);
273 			continue;
274 		}
275 
276 		if (vflag) {
277 			if (vflag > 1)
278 				ls_list(arcn, now, listf);
279 			else {
280 				(void)safe_print(arcn->name, listf);
281 				vfpart = 1;
282 			}
283 		}
284 
285 		/*
286 		 * if required, chdir around.
287 		 */
288 		if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL))
289 			if (chdir(arcn->pat->chdname) != 0)
290 				syswarn(1, errno, "Cannot chdir to %s",
291 				    arcn->pat->chdname);
292 
293 		/*
294 		 * all ok, extract this member based on type
295 		 */
296 		if (!PAX_IS_REG(arcn->type)) {
297 			/*
298 			 * process archive members that are not regular files.
299 			 * throw out padding and any data that might follow the
300 			 * header (as determined by the format).
301 			 */
302 			if (PAX_IS_HARDLINK(arcn->type))
303 				res = lnk_creat(arcn);
304 			else
305 				res = node_creat(arcn);
306 
307 			(void)rd_skip(arcn->skip + arcn->pad);
308 			if (res < 0)
309 				purg_lnk(arcn);
310 
311 			if (vflag && vfpart) {
312 				(void)putc('\n', listf);
313 				vfpart = 0;
314 			}
315 			goto popd;
316 		}
317 		/*
318 		 * we have a file with data here. If we can not create it, skip
319 		 * over the data and purge the name from hard link table
320 		 */
321 		if ((fd = file_creat(arcn)) < 0) {
322 			(void)rd_skip(arcn->skip + arcn->pad);
323 			purg_lnk(arcn);
324 			goto popd;
325 		}
326 		/*
327 		 * extract the file from the archive and skip over padding and
328 		 * any unprocessed data
329 		 */
330 		res = rd_wrfile(arcn, fd, &cnt);
331 		file_close(arcn, fd);
332 		if (vflag && vfpart) {
333 			(void)putc('\n', listf);
334 			vfpart = 0;
335 		}
336 		if (!res)
337 			(void)rd_skip(cnt + arcn->pad);
338 
339 popd:
340 		/*
341 		 * if required, chdir around.
342 		 */
343 		if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL))
344 			if (fchdir(cwdfd) != 0)
345 				syswarn(1, errno,
346 				    "Can't fchdir to starting directory");
347 	}
348 
349 	/*
350 	 * all done, restore directory modes and times as required; make sure
351 	 * all patterns supplied by the user were matched; block off signals
352 	 * to avoid chance for multiple entry into the cleanup code.
353 	 */
354 	(void)(*frmt->end_rd)();
355 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
356 	ar_close(0);
357 	sltab_process(0);
358 	proc_dir(0);
359 	pat_chk();
360 }
361 
362 /*
363  * wr_archive()
364  *	Write an archive. used in both creating a new archive and appends on
365  *	previously written archive.
366  */
367 
368 static void
369 wr_archive(ARCHD *arcn, int is_app)
370 {
371 	int res;
372 	int hlk;
373 	int wr_one;
374 	off_t cnt;
375 	int (*wrf)(ARCHD *);
376 	int fd = -1;
377 	time_t now;
378 
379 	/*
380 	 * if this format supports hard link storage, start up the database
381 	 * that detects them.
382 	 */
383 	if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0))
384 		return;
385 
386 	/*
387 	 * if this is not append, and there are no files, we do not write a
388 	 * trailer
389 	 */
390 	wr_one = is_app;
391 
392 	/*
393 	 * start up the file traversal code and format specific write
394 	 */
395 	if (ftree_start() < 0) {
396 		if (is_app)
397 			goto trailer;
398 		return;
399 	} else if (((*frmt->st_wr)() < 0))
400 		return;
401 
402 	wrf = frmt->wr;
403 
404 	/*
405 	 * When we are doing interactive rename, we store the mapping of names
406 	 * so we can fix up hard links files later in the archive.
407 	 */
408 	if (iflag && (name_start() < 0))
409 		return;
410 
411 	now = time(NULL);
412 
413 	/*
414 	 * while there are files to archive, process them one at at time
415 	 */
416 	while (next_file(arcn) == 0) {
417 		/*
418 		 * check if this file meets user specified options match.
419 		 */
420 		if (sel_chk(arcn) != 0)
421 			continue;
422 		fd = -1;
423 		if (uflag) {
424 			/*
425 			 * only archive if this file is newer than a file with
426 			 * the same name that is already stored on the archive
427 			 */
428 			if ((res = chk_ftime(arcn)) < 0)
429 				break;
430 			if (res > 0) {
431 				ftree_skipped_newer(arcn);
432 				continue;
433 			}
434 		}
435 
436 		/*
437 		 * this file is considered selected now. see if this is a hard
438 		 * link to a file already stored
439 		 */
440 		ftree_sel(arcn);
441 		if (hlk && (chk_lnk(arcn) < 0))
442 			break;
443 
444 		/*
445 		 * Modify the name as requested by the user
446 		 */
447 		if ((res = mod_name(arcn)) < 0) {
448 			/*
449 			 * pax finished, purge link table entry and stop
450 			 */
451 			purg_lnk(arcn);
452 			break;
453 		} else if (res > 0) {
454 			/*
455 			 * skipping file, purge link table entry
456 			 */
457 			purg_lnk(arcn);
458 			continue;
459 		}
460 
461 		if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG)) {
462 			/*
463 			 * we will have to read this file. by opening it now we
464 			 * can avoid writing a header to the archive for a file
465 			 * we were later unable to read (we also purge it from
466 			 * the link table).
467 			 */
468 			if ((fd = open(arcn->org_name, O_RDONLY)) < 0) {
469 				syswarn(1,errno, "Unable to open %s to read",
470 					arcn->org_name);
471 				purg_lnk(arcn);
472 				continue;
473 			}
474 		}
475 
476 		if (docrc && (set_crc(arcn, fd) < 0)) {
477 			/*
478 			 * unable to obtain the crc we need, close the file,
479 			 * purge link table entry
480 			 */
481 			rdfile_close(arcn, &fd);
482 			purg_lnk(arcn);
483 			continue;
484 		}
485 
486 		if (vflag) {
487 			if (vflag > 1)
488 				ls_list(arcn, now, listf);
489 			else {
490 				(void)safe_print(arcn->name, listf);
491 				vfpart = 1;
492 			}
493 		}
494 		++flcnt;
495 
496 		/*
497 		 * looks safe to store the file, have the format specific
498 		 * routine write routine store the file header on the archive
499 		 */
500 		if ((res = (*wrf)(arcn)) < 0) {
501 			rdfile_close(arcn, &fd);
502 			break;
503 		}
504 		wr_one = 1;
505 		if (res > 0) {
506 			/*
507 			 * format write says no file data needs to be stored
508 			 * so we are done messing with this file
509 			 */
510 			if (vflag && vfpart) {
511 				(void)putc('\n', listf);
512 				vfpart = 0;
513 			}
514 			rdfile_close(arcn, &fd);
515 			continue;
516 		}
517 
518 		/*
519 		 * Add file data to the archive, quit on write error. if we
520 		 * cannot write the entire file contents to the archive we
521 		 * must pad the archive to replace the missing file data
522 		 * (otherwise during an extract the file header for the file
523 		 * which FOLLOWS this one will not be where we expect it to
524 		 * be).
525 		 */
526 		res = wr_rdfile(arcn, fd, &cnt);
527 		rdfile_close(arcn, &fd);
528 		if (vflag && vfpart) {
529 			(void)putc('\n', listf);
530 			vfpart = 0;
531 		}
532 		if (res < 0)
533 			break;
534 
535 		/*
536 		 * pad as required, cnt is number of bytes not written
537 		 */
538 		if (((cnt > 0) && (wr_skip(cnt) < 0)) ||
539 		    ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0)))
540 			break;
541 	}
542 
543 trailer:
544 	/*
545 	 * tell format to write trailer; pad to block boundary; reset directory
546 	 * mode/access times, and check if all patterns supplied by the user
547 	 * were matched. block off signals to avoid chance for multiple entry
548 	 * into the cleanup code
549 	 */
550 	if (wr_one) {
551 		(*frmt->end_wr)();
552 		wr_fin();
553 	}
554 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
555 	ar_close(0);
556 	if (tflag)
557 		proc_dir(0);
558 	ftree_chk();
559 }
560 
561 /*
562  * append()
563  *	Add file to previously written archive. Archive format specified by the
564  *	user must agree with archive. The archive is read first to collect
565  *	modification times (if -u) and locate the archive trailer. The archive
566  *	is positioned in front of the record with the trailer and wr_archive()
567  *	is called to add the new members.
568  *	PAX IMPLEMENTATION DETAIL NOTE:
569  *	-u is implemented by adding the new members to the end of the archive.
570  *	Care is taken so that these do not end up as links to the older
571  *	version of the same file already stored in the archive. It is expected
572  *	when extraction occurs these newer versions will over-write the older
573  *	ones stored "earlier" in the archive (this may be a bad assumption as
574  *	it depends on the implementation of the program doing the extraction).
575  *	It is really difficult to splice in members without either re-writing
576  *	the entire archive (from the point were the old version was), or having
577  *	assistance of the format specification in terms of a special update
578  *	header that invalidates a previous archive record. The posix spec left
579  *	the method used to implement -u unspecified. This pax is able to
580  *	over write existing files that it creates.
581  */
582 
583 void
584 append(void)
585 {
586 	ARCHD *arcn;
587 	int res;
588 	ARCHD archd;
589 	FSUB *orgfrmt;
590 	int udev;
591 	off_t tlen;
592 
593 	arcn = &archd;
594 	orgfrmt = frmt;
595 
596 	/*
597 	 * Do not allow an append operation if the actual archive is of a
598 	 * different format than the user specified format.
599 	 */
600 	if (get_arc() < 0)
601 		return;
602 	if ((orgfrmt != NULL) && (orgfrmt != frmt)) {
603 		paxwarn(1, "Cannot mix current archive format %s with %s",
604 		    frmt->name, orgfrmt->name);
605 		return;
606 	}
607 
608 	/*
609 	 * pass the format any options and start up format
610 	 */
611 	if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0))
612 		return;
613 
614 	/*
615 	 * if we only are adding members that are newer, we need to save the
616 	 * mod times for all files we see.
617 	 */
618 	if (uflag && (ftime_start() < 0))
619 		return;
620 
621 	/*
622 	 * some archive formats encode hard links by recording the device and
623 	 * file serial number (inode) but copy the file anyway (multiple times)
624 	 * to the archive. When we append, we run the risk that newly added
625 	 * files may have the same device and inode numbers as those recorded
626 	 * on the archive but during a previous run. If this happens, when the
627 	 * archive is extracted we get INCORRECT hard links. We avoid this by
628 	 * remapping the device numbers so that newly added files will never
629 	 * use the same device number as one found on the archive. remapping
630 	 * allows new members to safely have links among themselves. remapping
631 	 * also avoids problems with file inode (serial number) truncations
632 	 * when the inode number is larger than storage space in the archive
633 	 * header. See the remap routines for more details.
634 	 */
635 	if ((udev = frmt->udev) && (dev_start() < 0))
636 		return;
637 
638 	/*
639 	 * reading the archive may take a long time. If verbose tell the user
640 	 */
641 	if (vflag) {
642 		(void)fprintf(listf,
643 			"%s: Reading archive to position at the end...", argv0);
644 		vfpart = 1;
645 	}
646 
647 	/*
648 	 * step through the archive until the format says it is done
649 	 */
650 	while (next_head(arcn) == 0) {
651 		/*
652 		 * check if this file meets user specified options.
653 		 */
654 		if (sel_chk(arcn) != 0) {
655 			if (rd_skip(arcn->skip + arcn->pad) == 1)
656 				break;
657 			continue;
658 		}
659 
660 		if (uflag) {
661 			/*
662 			 * see if this is the newest version of this file has
663 			 * already been seen, if so skip.
664 			 */
665 			if ((res = chk_ftime(arcn)) < 0)
666 				break;
667 			if (res > 0) {
668 				if (rd_skip(arcn->skip + arcn->pad) == 1)
669 					break;
670 				continue;
671 			}
672 		}
673 
674 		/*
675 		 * Store this device number. Device numbers seen during the
676 		 * read phase of append will cause newly appended files with a
677 		 * device number seen in the old part of the archive to be
678 		 * remapped to an unused device number.
679 		 */
680 		if ((udev && (add_dev(arcn) < 0)) ||
681 		    (rd_skip(arcn->skip + arcn->pad) == 1))
682 			break;
683 	}
684 
685 	/*
686 	 * done, finish up read and get the number of bytes to back up so we
687 	 * can add new members. The format might have used the hard link table,
688 	 * purge it.
689 	 */
690 	tlen = (*frmt->end_rd)();
691 	lnk_end();
692 
693 	/*
694 	 * try to position for write, if this fails quit. if any error occurs,
695 	 * we will refuse to write
696 	 */
697 	if (appnd_start(tlen) < 0)
698 		return;
699 
700 	/*
701 	 * tell the user we are done reading.
702 	 */
703 	if (vflag && vfpart) {
704 		(void)fputs("done.\n", listf);
705 		vfpart = 0;
706 	}
707 
708 	/*
709 	 * go to the writing phase to add the new members
710 	 */
711 	wr_archive(arcn, 1);
712 }
713 
714 /*
715  * archive()
716  *	write a new archive
717  */
718 
719 void
720 archive(void)
721 {
722 	ARCHD archd;
723 
724 	/*
725 	 * if we only are adding members that are newer, we need to save the
726 	 * mod times for all files; set up for writing; pass the format any
727 	 * options write the archive
728 	 */
729 	if ((uflag && (ftime_start() < 0)) || (wr_start() < 0))
730 		return;
731 	if ((*frmt->options)() < 0)
732 		return;
733 
734 	wr_archive(&archd, 0);
735 }
736 
737 /*
738  * copy()
739  *	copy files from one part of the file system to another. this does not
740  *	use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an
741  *	archive was written and then extracted in the destination directory
742  *	(except the files are forced to be under the destination directory).
743  */
744 
745 void
746 copy(void)
747 {
748 	ARCHD *arcn;
749 	int res;
750 	int fddest;
751 	char *dest_pt;
752 	size_t dlen;
753 	size_t drem;
754 	int fdsrc = -1;
755 	struct stat sb;
756 	ARCHD archd;
757 	char dirbuf[PAXPATHLEN+1];
758 
759 	sltab_start();
760 
761 	arcn = &archd;
762 	/*
763 	 * set up the destination dir path and make sure it is a directory. We
764 	 * make sure we have a trailing / on the destination
765 	 */
766 	dlen = strlcpy(dirbuf, dirptr, sizeof(dirbuf));
767 	if (dlen >= sizeof(dirbuf) ||
768 	    (dlen == sizeof(dirbuf) - 1 && dirbuf[dlen - 1] != '/')) {
769 		paxwarn(1, "directory name is too long %s", dirptr);
770 		return;
771 	}
772 	dest_pt = dirbuf + dlen;
773 	if (*(dest_pt-1) != '/') {
774 		*dest_pt++ = '/';
775 		*dest_pt = '\0';
776 		++dlen;
777 	}
778 	drem = PAXPATHLEN - dlen;
779 
780 	if (stat(dirptr, &sb) == -1) {
781 		syswarn(1, errno, "Cannot access destination directory %s",
782 			dirptr);
783 		return;
784 	}
785 	if (!S_ISDIR(sb.st_mode)) {
786 		paxwarn(1, "Destination is not a directory %s", dirptr);
787 		return;
788 	}
789 
790 	/*
791 	 * start up the hard link table; file traversal routines and the
792 	 * modification time and access mode database
793 	 */
794 	if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0))
795 		return;
796 
797 	/*
798 	 * When we are doing interactive rename, we store the mapping of names
799 	 * so we can fix up hard links files later in the archive.
800 	 */
801 	if (iflag && (name_start() < 0))
802 		return;
803 
804 	/*
805 	 * set up to cp file trees
806 	 */
807 	cp_start();
808 
809 	/*
810 	 * while there are files to archive, process them
811 	 */
812 	while (next_file(arcn) == 0) {
813 		fdsrc = -1;
814 
815 		/*
816 		 * check if this file meets user specified options
817 		 */
818 		if (sel_chk(arcn) != 0)
819 			continue;
820 
821 		/*
822 		 * if there is already a file in the destination directory with
823 		 * the same name and it is newer, skip the one stored on the
824 		 * archive.
825 		 * NOTE: this test is done BEFORE name modifications as
826 		 * specified by pax. this can be confusing to the user who
827 		 * might expect the test to be done on an existing file AFTER
828 		 * the name mod. In honesty the pax spec is probably flawed in
829 		 * this respect
830 		 */
831 		if (uflag || Dflag) {
832 			/*
833 			 * create the destination name
834 			 */
835 			if (strlcpy(dest_pt, arcn->name + (*arcn->name == '/'),
836 			    drem + 1) > drem) {
837 				paxwarn(1, "Destination pathname too long %s",
838 					arcn->name);
839 				continue;
840 			}
841 
842 			/*
843 			 * if existing file is same age or newer skip
844 			 */
845 			res = lstat(dirbuf, &sb);
846 			*dest_pt = '\0';
847 
848 			if (res == 0) {
849 				ftree_skipped_newer(arcn);
850 				if (cmp_file_times(uflag, Dflag, arcn, &sb))
851 					continue;
852 			}
853 		}
854 
855 		/*
856 		 * this file is considered selected. See if this is a hard link
857 		 * to a previous file; modify the name as requested by the
858 		 * user; set the final destination.
859 		 */
860 		ftree_sel(arcn);
861 		if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn)) < 0))
862 			break;
863 		if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) {
864 			/*
865 			 * skip file, purge from link table
866 			 */
867 			purg_lnk(arcn);
868 			continue;
869 		}
870 
871 		/*
872 		 * Non standard -Y and -Z flag. When the existing file is
873 		 * same age or newer skip
874 		 */
875 		if ((Yflag || Zflag) &&
876 		    cmp_file_times(Yflag, Zflag, arcn, NULL))
877 			continue;
878 
879 		if (vflag) {
880 			(void)safe_print(arcn->name, listf);
881 			vfpart = 1;
882 		}
883 		++flcnt;
884 
885 		/*
886 		 * try to create a hard link to the src file if requested
887 		 * but make sure we are not trying to overwrite ourselves.
888 		 */
889 		if (lflag)
890 			res = cross_lnk(arcn);
891 		else
892 			res = chk_same(arcn);
893 		if (res <= 0) {
894 			if (vflag && vfpart) {
895 				(void)putc('\n', listf);
896 				vfpart = 0;
897 			}
898 			continue;
899 		}
900 
901 		/*
902 		 * have to create a new file
903 		 */
904 		if (!PAX_IS_REG(arcn->type)) {
905 			/*
906 			 * create a link or special file
907 			 */
908 			if (PAX_IS_HARDLINK(arcn->type))
909 				res = lnk_creat(arcn);
910 			else
911 				res = node_creat(arcn);
912 			if (res < 0)
913 				purg_lnk(arcn);
914 			if (vflag && vfpart) {
915 				(void)putc('\n', listf);
916 				vfpart = 0;
917 			}
918 			continue;
919 		}
920 
921 		/*
922 		 * have to copy a regular file to the destination directory.
923 		 * first open source file and then create the destination file
924 		 */
925 		if ((fdsrc = open(arcn->org_name, O_RDONLY)) < 0) {
926 			syswarn(1, errno, "Unable to open %s to read",
927 			    arcn->org_name);
928 			purg_lnk(arcn);
929 			continue;
930 		}
931 		if ((fddest = file_creat(arcn)) < 0) {
932 			rdfile_close(arcn, &fdsrc);
933 			purg_lnk(arcn);
934 			continue;
935 		}
936 
937 		/*
938 		 * copy source file data to the destination file
939 		 */
940 		cp_file(arcn, fdsrc, fddest);
941 		file_close(arcn, fddest);
942 		rdfile_close(arcn, &fdsrc);
943 
944 		if (vflag && vfpart) {
945 			(void)putc('\n', listf);
946 			vfpart = 0;
947 		}
948 	}
949 
950 	/*
951 	 * restore directory modes and times as required; make sure all
952 	 * patterns were selected block off signals to avoid chance for
953 	 * multiple entry into the cleanup code.
954 	 */
955 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
956 	ar_close(0);
957 	sltab_process(0);
958 	proc_dir(0);
959 	ftree_chk();
960 }
961 
962 /*
963  * next_head()
964  *	try to find a valid header in the archive. Uses format specific
965  *	routines to extract the header and id the trailer. Trailers may be
966  *	located within a valid header or in an invalid header (the location
967  *	is format specific. The inhead field from the option table tells us
968  *	where to look for the trailer).
969  *	We keep reading (and resyncing) until we get enough contiguous data
970  *	to check for a header. If we cannot find one, we shift by a byte
971  *	add a new byte from the archive to the end of the buffer and try again.
972  *	If we get a read error, we throw out what we have (as we must have
973  *	contiguous data) and start over again.
974  *	ASSUMED: headers fit within a BLKMULT header.
975  * Return:
976  *	0 if we got a header, -1 if we are unable to ever find another one
977  *	(we reached the end of input, or we reached the limit on retries. see
978  *	the specs for rd_wrbuf() for more details)
979  */
980 
981 static int
982 next_head(ARCHD *arcn)
983 {
984 	int ret;
985 	char *hdend;
986 	int res;
987 	int shftsz;
988 	int hsz;
989 	int in_resync = 0;		/* set when we are in resync mode */
990 	int cnt = 0;			/* counter for trailer function */
991 	int first = 1;			/* on 1st read, EOF isn't premature. */
992 
993 	/*
994 	 * set up initial conditions, we want a whole frmt->hsz block as we
995 	 * have no data yet.
996 	 */
997 	res = hsz = frmt->hsz;
998 	hdend = hdbuf;
999 	shftsz = hsz - 1;
1000 	for (;;) {
1001 		/*
1002 		 * keep looping until we get a contiguous FULL buffer
1003 		 * (frmt->hsz is the proper size)
1004 		 */
1005 		for (;;) {
1006 			if ((ret = rd_wrbuf(hdend, res)) == res)
1007 				break;
1008 
1009 			/*
1010 			 * If we read 0 bytes (EOF) from an archive when we
1011 			 * expect to find a header, we have stepped upon
1012 			 * an archive without the customary block of zeroes
1013 			 * end marker.  It's just stupid to error out on
1014 			 * them, so exit gracefully.
1015 			 */
1016 			if (first && ret == 0)
1017 				return(-1);
1018 			first = 0;
1019 
1020 			/*
1021 			 * some kind of archive read problem, try to resync the
1022 			 * storage device, better give the user the bad news.
1023 			 */
1024 			if ((ret == 0) || (rd_sync() < 0)) {
1025 				paxwarn(1,"Premature end of file on archive read");
1026 				return(-1);
1027 			}
1028 			if (!in_resync) {
1029 				if (act == APPND) {
1030 					paxwarn(1,
1031 					  "Archive I/O error, cannot continue");
1032 					return(-1);
1033 				}
1034 				paxwarn(1,"Archive I/O error. Trying to recover.");
1035 				++in_resync;
1036 			}
1037 
1038 			/*
1039 			 * oh well, throw it all out and start over
1040 			 */
1041 			res = hsz;
1042 			hdend = hdbuf;
1043 		}
1044 
1045 		/*
1046 		 * ok we have a contiguous buffer of the right size. Call the
1047 		 * format read routine. If this was not a valid header and this
1048 		 * format stores trailers outside of the header, call the
1049 		 * format specific trailer routine to check for a trailer. We
1050 		 * have to watch out that we do not mis-identify file data or
1051 		 * block padding as a header or trailer. Format specific
1052 		 * trailer functions must NOT check for the trailer while we
1053 		 * are running in resync mode. Some trailer functions may tell
1054 		 * us that this block cannot contain a valid header either, so
1055 		 * we then throw out the entire block and start over.
1056 		 */
1057 		if ((*frmt->rd)(arcn, hdbuf) == 0)
1058 			break;
1059 
1060 		if (!frmt->inhead) {
1061 			/*
1062 			 * this format has trailers outside of valid headers
1063 			 */
1064 			if ((ret = (*frmt->trail)(arcn,hdbuf,in_resync,&cnt)) == 0){
1065 				/*
1066 				 * valid trailer found, drain input as required
1067 				 */
1068 				ar_drain();
1069 				return(-1);
1070 			}
1071 
1072 			if (ret == 1) {
1073 				/*
1074 				 * we are in resync and we were told to throw
1075 				 * the whole block out because none of the
1076 				 * bytes in this block can be used to form a
1077 				 * valid header
1078 				 */
1079 				res = hsz;
1080 				hdend = hdbuf;
1081 				continue;
1082 			}
1083 		}
1084 
1085 		/*
1086 		 * Brute force section.
1087 		 * not a valid header. We may be able to find a header yet. So
1088 		 * we shift over by one byte, and set up to read one byte at a
1089 		 * time from the archive and place it at the end of the buffer.
1090 		 * We will keep moving byte at a time until we find a header or
1091 		 * get a read error and have to start over.
1092 		 */
1093 		if (!in_resync) {
1094 			if (act == APPND) {
1095 				paxwarn(1,"Unable to append, archive header flaw");
1096 				return(-1);
1097 			}
1098 			paxwarn(1,"Invalid header, starting valid header search.");
1099 			++in_resync;
1100 		}
1101 		memmove(hdbuf, hdbuf+1, shftsz);
1102 		res = 1;
1103 		hdend = hdbuf + shftsz;
1104 	}
1105 
1106 	/*
1107 	 * ok got a valid header, check for trailer if format encodes it in the
1108 	 * the header. NOTE: the parameters are different than trailer routines
1109 	 * which encode trailers outside of the header!
1110 	 */
1111 	if (frmt->inhead && ((*frmt->trail)(arcn,NULL,0,NULL) == 0)) {
1112 		/*
1113 		 * valid trailer found, drain input as required
1114 		 */
1115 		ar_drain();
1116 		return(-1);
1117 	}
1118 
1119 	++flcnt;
1120 	return(0);
1121 }
1122 
1123 /*
1124  * get_arc()
1125  *	Figure out what format an archive is. Handles archive with flaws by
1126  *	brute force searches for a legal header in any supported format. The
1127  *	format id routines have to be careful to NOT mis-identify a format.
1128  *	ASSUMED: headers fit within a BLKMULT header.
1129  * Return:
1130  *	0 if archive found -1 otherwise
1131  */
1132 
1133 static int
1134 get_arc(void)
1135 {
1136 	int i;
1137 	int hdsz = 0;
1138 	int res;
1139 	int minhd = BLKMULT;
1140 	char *hdend;
1141 	int notice = 0;
1142 
1143 	/*
1144 	 * find the smallest header size in all archive formats and then set up
1145 	 * to read the archive.
1146 	 */
1147 	for (i = 0; ford[i] >= 0; ++i) {
1148 		if (fsub[ford[i]].name != NULL && fsub[ford[i]].hsz < minhd)
1149 			minhd = fsub[ford[i]].hsz;
1150 	}
1151 	if (rd_start() < 0)
1152 		return(-1);
1153 	res = BLKMULT;
1154 	hdsz = 0;
1155 	hdend = hdbuf;
1156 	for (;;) {
1157 		for (;;) {
1158 			/*
1159 			 * fill the buffer with at least the smallest header
1160 			 */
1161 			i = rd_wrbuf(hdend, res);
1162 			if (i > 0)
1163 				hdsz += i;
1164 			if (hdsz >= minhd)
1165 				break;
1166 
1167 			/*
1168 			 * if we cannot recover from a read error quit
1169 			 */
1170 			if ((i == 0) || (rd_sync() < 0))
1171 				goto out;
1172 
1173 			/*
1174 			 * when we get an error none of the data we already
1175 			 * have can be used to create a legal header (we just
1176 			 * got an error in the middle), so we throw it all out
1177 			 * and refill the buffer with fresh data.
1178 			 */
1179 			res = BLKMULT;
1180 			hdsz = 0;
1181 			hdend = hdbuf;
1182 			if (!notice) {
1183 				if (act == APPND)
1184 					return(-1);
1185 				paxwarn(1,"Cannot identify format. Searching...");
1186 				++notice;
1187 			}
1188 		}
1189 
1190 		/*
1191 		 * we have at least the size of the smallest header in any
1192 		 * archive format. Look to see if we have a match. The array
1193 		 * ford[] is used to specify the header id order to reduce the
1194 		 * chance of incorrectly id'ing a valid header (some formats
1195 		 * may be subsets of each other and the order would then be
1196 		 * important).
1197 		 */
1198 		for (i = 0; ford[i] >= 0; ++i) {
1199 			if (fsub[ford[i]].id == NULL ||
1200 			    (*fsub[ford[i]].id)(hdbuf, hdsz) < 0)
1201 				continue;
1202 			frmt = &(fsub[ford[i]]);
1203 			/*
1204 			 * yuck, to avoid slow special case code in the extract
1205 			 * routines, just push this header back as if it was
1206 			 * not seen. We have left extra space at start of the
1207 			 * buffer for this purpose. This is a bit ugly, but
1208 			 * adding all the special case code is far worse.
1209 			 */
1210 			pback(hdbuf, hdsz);
1211 			return(0);
1212 		}
1213 
1214 		/*
1215 		 * We have a flawed archive, no match. we start searching, but
1216 		 * we never allow additions to flawed archives
1217 		 */
1218 		if (!notice) {
1219 			if (act == APPND)
1220 				return(-1);
1221 			paxwarn(1, "Cannot identify format. Searching...");
1222 			++notice;
1223 		}
1224 
1225 		/*
1226 		 * brute force search for a header that we can id.
1227 		 * we shift through byte at a time. this is slow, but we cannot
1228 		 * determine the nature of the flaw in the archive in a
1229 		 * portable manner
1230 		 */
1231 		if (--hdsz > 0) {
1232 			memmove(hdbuf, hdbuf+1, hdsz);
1233 			res = BLKMULT - hdsz;
1234 			hdend = hdbuf + hdsz;
1235 		} else {
1236 			res = BLKMULT;
1237 			hdend = hdbuf;
1238 			hdsz = 0;
1239 		}
1240 	}
1241 
1242     out:
1243 	/*
1244 	 * we cannot find a header, bow, apologize and quit
1245 	 */
1246 	paxwarn(1, "Sorry, unable to determine archive format.");
1247 	return(-1);
1248 }
1249