xref: /openbsd/bin/pax/ar_subs.c (revision fc61954a)
1 /*	$OpenBSD: ar_subs.c,v 1.48 2016/08/26 05:06:14 guenther Exp $	*/
2 /*	$NetBSD: ar_subs.c,v 1.5 1995/03/21 09:07:06 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1992 Keith Muller.
6  * Copyright (c) 1992, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * Keith Muller of the University of California, San Diego.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <errno.h>
40 #include <fcntl.h>
41 #include <signal.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <time.h>
45 #include <unistd.h>
46 
47 #include "pax.h"
48 #include "extern.h"
49 
50 static void wr_archive(ARCHD *, int is_app);
51 static int get_arc(void);
52 static int next_head(ARCHD *);
53 extern sigset_t s_mask;
54 
55 /*
56  * Routines which control the overall operation modes of pax as specified by
57  * the user: list, append, read ...
58  */
59 
60 static char hdbuf[BLKMULT];		/* space for archive header on read */
61 u_long flcnt;				/* number of files processed */
62 
63 /*
64  * list()
65  *	list the contents of an archive which match user supplied pattern(s)
66  *	(no pattern matches all).
67  */
68 
69 void
70 list(void)
71 {
72 	ARCHD *arcn;
73 	int res;
74 	ARCHD archd;
75 	time_t now;
76 
77 	arcn = &archd;
78 	/*
79 	 * figure out archive type; pass any format specific options to the
80 	 * archive option processing routine; call the format init routine. We
81 	 * also save current time for ls_list() so we do not make a system
82 	 * call for each file we need to print. If verbose (vflag) start up
83 	 * the name and group caches.
84 	 */
85 	if ((get_arc() < 0) || ((*frmt->options)() < 0) ||
86 	    ((*frmt->st_rd)() < 0))
87 		return;
88 
89 	now = time(NULL);
90 
91 	/*
92 	 * step through the archive until the format says it is done
93 	 */
94 	while (next_head(arcn) == 0) {
95 		if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
96 			/*
97 			 * we need to read, to get the real filename
98 			 */
99 			off_t cnt;
100 			if (!rd_wrfile(arcn, arcn->type == PAX_GLF
101 			    ? -1 : -2, &cnt))
102 				(void)rd_skip(cnt + arcn->pad);
103 			continue;
104 		}
105 
106 		/*
107 		 * check for pattern, and user specified options match.
108 		 * When all patterns are matched we are done.
109 		 */
110 		if ((res = pat_match(arcn)) < 0)
111 			break;
112 
113 		if ((res == 0) && (sel_chk(arcn) == 0)) {
114 			/*
115 			 * pattern resulted in a selected file
116 			 */
117 			if (pat_sel(arcn) < 0)
118 				break;
119 
120 			/*
121 			 * modify the name as requested by the user if name
122 			 * survives modification, do a listing of the file
123 			 */
124 			if ((res = mod_name(arcn)) < 0)
125 				break;
126 			if (res == 0)
127 				ls_list(arcn, now, stdout);
128 		}
129 
130 		/*
131 		 * skip to next archive format header using values calculated
132 		 * by the format header read routine
133 		 */
134 		if (rd_skip(arcn->skip + arcn->pad) == 1)
135 			break;
136 	}
137 
138 	/*
139 	 * all done, let format have a chance to cleanup, and make sure that
140 	 * the patterns supplied by the user were all matched
141 	 */
142 	(void)(*frmt->end_rd)();
143 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
144 	ar_close(0);
145 	pat_chk();
146 }
147 
148 static int
149 cmp_file_times(int mtime_flag, int ctime_flag, ARCHD *arcn, struct stat *sbp)
150 {
151 	struct stat sb;
152 
153 	if (sbp == NULL) {
154 		if (lstat(arcn->name, &sb) != 0)
155 			return (0);
156 		sbp = &sb;
157 	}
158 
159 	if (ctime_flag && mtime_flag)
160 		return (timespeccmp(&arcn->sb.st_mtim, &sbp->st_mtim, <=) &&
161 		        timespeccmp(&arcn->sb.st_ctim, &sbp->st_ctim, <=));
162 	else if (ctime_flag)
163 		return (timespeccmp(&arcn->sb.st_ctim, &sbp->st_ctim, <=));
164 	else
165 		return (timespeccmp(&arcn->sb.st_mtim, &sbp->st_mtim, <=));
166 }
167 
168 /*
169  * extract()
170  *	extract the member(s) of an archive as specified by user supplied
171  *	pattern(s) (no patterns extracts all members)
172  */
173 
174 void
175 extract(void)
176 {
177 	ARCHD *arcn;
178 	int res;
179 	off_t cnt;
180 	ARCHD archd;
181 	int fd;
182 	time_t now;
183 
184 	sltab_start();
185 
186 	arcn = &archd;
187 	/*
188 	 * figure out archive type; pass any format specific options to the
189 	 * archive option processing routine; call the format init routine;
190 	 * start up the directory modification time and access mode database
191 	 */
192 	if ((get_arc() < 0) || ((*frmt->options)() < 0) ||
193 	    ((*frmt->st_rd)() < 0) || (dir_start() < 0))
194 		return;
195 
196 	/*
197 	 * When we are doing interactive rename, we store the mapping of names
198 	 * so we can fix up hard links files later in the archive.
199 	 */
200 	if (iflag && (name_start() < 0))
201 		return;
202 
203 	now = time(NULL);
204 
205 	/*
206 	 * step through each entry on the archive until the format read routine
207 	 * says it is done
208 	 */
209 	while (next_head(arcn) == 0) {
210 		if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
211 			/*
212 			 * we need to read, to get the real filename
213 			 */
214 			if (!rd_wrfile(arcn, arcn->type == PAX_GLF
215 			    ? -1 : -2, &cnt))
216 				(void)rd_skip(cnt + arcn->pad);
217 			continue;
218 		}
219 
220 		/*
221 		 * check for pattern, and user specified options match. When
222 		 * all the patterns are matched we are done
223 		 */
224 		if ((res = pat_match(arcn)) < 0)
225 			break;
226 
227 		if ((res > 0) || (sel_chk(arcn) != 0)) {
228 			/*
229 			 * file is not selected. skip past any file data and
230 			 * padding and go back for the next archive member
231 			 */
232 			(void)rd_skip(arcn->skip + arcn->pad);
233 			continue;
234 		}
235 
236 		/*
237 		 * with -u or -D only extract when the archive member is newer
238 		 * than the file with the same name in the file system (no
239 		 * test of being the same type is required).
240 		 * NOTE: this test is done BEFORE name modifications as
241 		 * specified by pax. this operation can be confusing to the
242 		 * user who might expect the test to be done on an existing
243 		 * file AFTER the name mod. In honesty the pax spec is probably
244 		 * flawed in this respect.
245 		 */
246 		if ((uflag || Dflag) &&
247 		    cmp_file_times(uflag, Dflag, arcn, NULL)) {
248 			(void)rd_skip(arcn->skip + arcn->pad);
249 			continue;
250 		}
251 
252 		/*
253 		 * this archive member is now been selected. modify the name.
254 		 */
255 		if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn)) < 0))
256 			break;
257 		if (res > 0) {
258 			/*
259 			 * a bad name mod, skip and purge name from link table
260 			 */
261 			purg_lnk(arcn);
262 			(void)rd_skip(arcn->skip + arcn->pad);
263 			continue;
264 		}
265 
266 		/*
267 		 * Non standard -Y and -Z flag. When the existing file is
268 		 * same age or newer skip
269 		 */
270 		if ((Yflag || Zflag) &&
271 		    cmp_file_times(Yflag, Zflag, arcn, NULL)) {
272 			(void)rd_skip(arcn->skip + arcn->pad);
273 			continue;
274 		}
275 
276 		if (vflag) {
277 			if (vflag > 1)
278 				ls_list(arcn, now, listf);
279 			else {
280 				(void)safe_print(arcn->name, listf);
281 				vfpart = 1;
282 			}
283 		}
284 
285 		/*
286 		 * if required, chdir around.
287 		 */
288 		if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL))
289 			if (chdir(arcn->pat->chdname) != 0)
290 				syswarn(1, errno, "Cannot chdir to %s",
291 				    arcn->pat->chdname);
292 
293 		/*
294 		 * all ok, extract this member based on type
295 		 */
296 		if (!PAX_IS_REG(arcn->type)) {
297 			/*
298 			 * process archive members that are not regular files.
299 			 * throw out padding and any data that might follow the
300 			 * header (as determined by the format).
301 			 */
302 			if (PAX_IS_HARDLINK(arcn->type))
303 				res = lnk_creat(arcn);
304 			else
305 				res = node_creat(arcn);
306 
307 			(void)rd_skip(arcn->skip + arcn->pad);
308 			if (res < 0)
309 				purg_lnk(arcn);
310 
311 			if (vflag && vfpart) {
312 				(void)putc('\n', listf);
313 				vfpart = 0;
314 			}
315 			goto popd;
316 		}
317 		/*
318 		 * we have a file with data here. If we can not create it, skip
319 		 * over the data and purge the name from hard link table
320 		 */
321 		if ((fd = file_creat(arcn)) < 0) {
322 			(void)rd_skip(arcn->skip + arcn->pad);
323 			purg_lnk(arcn);
324 			goto popd;
325 		}
326 		/*
327 		 * extract the file from the archive and skip over padding and
328 		 * any unprocessed data
329 		 */
330 		res = rd_wrfile(arcn, fd, &cnt);
331 		file_close(arcn, fd);
332 		if (vflag && vfpart) {
333 			(void)putc('\n', listf);
334 			vfpart = 0;
335 		}
336 		if (!res)
337 			(void)rd_skip(cnt + arcn->pad);
338 
339 popd:
340 		/*
341 		 * if required, chdir around.
342 		 */
343 		if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL))
344 			if (fchdir(cwdfd) != 0)
345 				syswarn(1, errno,
346 				    "Can't fchdir to starting directory");
347 	}
348 
349 	/*
350 	 * all done, restore directory modes and times as required; make sure
351 	 * all patterns supplied by the user were matched; block off signals
352 	 * to avoid chance for multiple entry into the cleanup code.
353 	 */
354 	(void)(*frmt->end_rd)();
355 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
356 	ar_close(0);
357 	sltab_process(0);
358 	proc_dir(0);
359 	pat_chk();
360 }
361 
362 /*
363  * wr_archive()
364  *	Write an archive. used in both creating a new archive and appends on
365  *	previously written archive.
366  */
367 
368 static void
369 wr_archive(ARCHD *arcn, int is_app)
370 {
371 	int res;
372 	int hlk;
373 	int wr_one;
374 	off_t cnt;
375 	int (*wrf)(ARCHD *);
376 	int fd = -1;
377 	time_t now;
378 
379 	/*
380 	 * if this format supports hard link storage, start up the database
381 	 * that detects them.
382 	 */
383 	if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0))
384 		return;
385 
386 	/*
387 	 * if this is not append, and there are no files, we do not write a
388 	 * trailer
389 	 */
390 	wr_one = is_app;
391 
392 	/*
393 	 * start up the file traversal code and format specific write
394 	 */
395 	if (ftree_start() < 0) {
396 		if (is_app)
397 			goto trailer;
398 		return;
399 	} else if (((*frmt->st_wr)() < 0))
400 		return;
401 
402 	wrf = frmt->wr;
403 
404 	/*
405 	 * When we are doing interactive rename, we store the mapping of names
406 	 * so we can fix up hard links files later in the archive.
407 	 */
408 	if (iflag && (name_start() < 0))
409 		return;
410 
411 	now = time(NULL);
412 
413 	/*
414 	 * while there are files to archive, process them one at at time
415 	 */
416 	while (next_file(arcn) == 0) {
417 		/*
418 		 * check if this file meets user specified options match.
419 		 */
420 		if (sel_chk(arcn) != 0)
421 			continue;
422 		fd = -1;
423 		if (uflag) {
424 			/*
425 			 * only archive if this file is newer than a file with
426 			 * the same name that is already stored on the archive
427 			 */
428 			if ((res = chk_ftime(arcn)) < 0)
429 				break;
430 			if (res > 0) {
431 				ftree_skipped_newer(arcn);
432 				continue;
433 			}
434 		}
435 
436 		/*
437 		 * this file is considered selected now. see if this is a hard
438 		 * link to a file already stored
439 		 */
440 		ftree_sel(arcn);
441 		if (hlk && (chk_lnk(arcn) < 0))
442 			break;
443 
444 		if (PAX_IS_REG(arcn->type) || (arcn->type == PAX_HRG)) {
445 			/*
446 			 * we will have to read this file. by opening it now we
447 			 * can avoid writing a header to the archive for a file
448 			 * we were later unable to read (we also purge it from
449 			 * the link table).
450 			 */
451 			if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) {
452 				syswarn(1,errno, "Unable to open %s to read",
453 					arcn->org_name);
454 				purg_lnk(arcn);
455 				continue;
456 			}
457 		}
458 
459 		/*
460 		 * Now modify the name as requested by the user
461 		 */
462 		if ((res = mod_name(arcn)) < 0) {
463 			/*
464 			 * name modification says to skip this file, close the
465 			 * file and purge link table entry
466 			 */
467 			rdfile_close(arcn, &fd);
468 			purg_lnk(arcn);
469 			break;
470 		}
471 
472 		if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) {
473 			/*
474 			 * unable to obtain the crc we need, close the file,
475 			 * purge link table entry
476 			 */
477 			rdfile_close(arcn, &fd);
478 			purg_lnk(arcn);
479 			continue;
480 		}
481 
482 		if (vflag) {
483 			if (vflag > 1)
484 				ls_list(arcn, now, listf);
485 			else {
486 				(void)safe_print(arcn->name, listf);
487 				vfpart = 1;
488 			}
489 		}
490 		++flcnt;
491 
492 		/*
493 		 * looks safe to store the file, have the format specific
494 		 * routine write routine store the file header on the archive
495 		 */
496 		if ((res = (*wrf)(arcn)) < 0) {
497 			rdfile_close(arcn, &fd);
498 			break;
499 		}
500 		wr_one = 1;
501 		if (res > 0) {
502 			/*
503 			 * format write says no file data needs to be stored
504 			 * so we are done messing with this file
505 			 */
506 			if (vflag && vfpart) {
507 				(void)putc('\n', listf);
508 				vfpart = 0;
509 			}
510 			rdfile_close(arcn, &fd);
511 			continue;
512 		}
513 
514 		/*
515 		 * Add file data to the archive, quit on write error. if we
516 		 * cannot write the entire file contents to the archive we
517 		 * must pad the archive to replace the missing file data
518 		 * (otherwise during an extract the file header for the file
519 		 * which FOLLOWS this one will not be where we expect it to
520 		 * be).
521 		 */
522 		res = wr_rdfile(arcn, fd, &cnt);
523 		rdfile_close(arcn, &fd);
524 		if (vflag && vfpart) {
525 			(void)putc('\n', listf);
526 			vfpart = 0;
527 		}
528 		if (res < 0)
529 			break;
530 
531 		/*
532 		 * pad as required, cnt is number of bytes not written
533 		 */
534 		if (((cnt > 0) && (wr_skip(cnt) < 0)) ||
535 		    ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0)))
536 			break;
537 	}
538 
539 trailer:
540 	/*
541 	 * tell format to write trailer; pad to block boundary; reset directory
542 	 * mode/access times, and check if all patterns supplied by the user
543 	 * were matched. block off signals to avoid chance for multiple entry
544 	 * into the cleanup code
545 	 */
546 	if (wr_one) {
547 		(*frmt->end_wr)();
548 		wr_fin();
549 	}
550 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
551 	ar_close(0);
552 	if (tflag)
553 		proc_dir(0);
554 	ftree_chk();
555 }
556 
557 /*
558  * append()
559  *	Add file to previously written archive. Archive format specified by the
560  *	user must agree with archive. The archive is read first to collect
561  *	modification times (if -u) and locate the archive trailer. The archive
562  *	is positioned in front of the record with the trailer and wr_archive()
563  *	is called to add the new members.
564  *	PAX IMPLEMENTATION DETAIL NOTE:
565  *	-u is implemented by adding the new members to the end of the archive.
566  *	Care is taken so that these do not end up as links to the older
567  *	version of the same file already stored in the archive. It is expected
568  *	when extraction occurs these newer versions will over-write the older
569  *	ones stored "earlier" in the archive (this may be a bad assumption as
570  *	it depends on the implementation of the program doing the extraction).
571  *	It is really difficult to splice in members without either re-writing
572  *	the entire archive (from the point were the old version was), or having
573  *	assistance of the format specification in terms of a special update
574  *	header that invalidates a previous archive record. The posix spec left
575  *	the method used to implement -u unspecified. This pax is able to
576  *	over write existing files that it creates.
577  */
578 
579 void
580 append(void)
581 {
582 	ARCHD *arcn;
583 	int res;
584 	ARCHD archd;
585 	FSUB *orgfrmt;
586 	int udev;
587 	off_t tlen;
588 
589 	arcn = &archd;
590 	orgfrmt = frmt;
591 
592 	/*
593 	 * Do not allow an append operation if the actual archive is of a
594 	 * different format than the user specified format.
595 	 */
596 	if (get_arc() < 0)
597 		return;
598 	if ((orgfrmt != NULL) && (orgfrmt != frmt)) {
599 		paxwarn(1, "Cannot mix current archive format %s with %s",
600 		    frmt->name, orgfrmt->name);
601 		return;
602 	}
603 
604 	/*
605 	 * pass the format any options and start up format
606 	 */
607 	if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0))
608 		return;
609 
610 	/*
611 	 * if we only are adding members that are newer, we need to save the
612 	 * mod times for all files we see.
613 	 */
614 	if (uflag && (ftime_start() < 0))
615 		return;
616 
617 	/*
618 	 * some archive formats encode hard links by recording the device and
619 	 * file serial number (inode) but copy the file anyway (multiple times)
620 	 * to the archive. When we append, we run the risk that newly added
621 	 * files may have the same device and inode numbers as those recorded
622 	 * on the archive but during a previous run. If this happens, when the
623 	 * archive is extracted we get INCORRECT hard links. We avoid this by
624 	 * remapping the device numbers so that newly added files will never
625 	 * use the same device number as one found on the archive. remapping
626 	 * allows new members to safely have links among themselves. remapping
627 	 * also avoids problems with file inode (serial number) truncations
628 	 * when the inode number is larger than storage space in the archive
629 	 * header. See the remap routines for more details.
630 	 */
631 	if ((udev = frmt->udev) && (dev_start() < 0))
632 		return;
633 
634 	/*
635 	 * reading the archive may take a long time. If verbose tell the user
636 	 */
637 	if (vflag) {
638 		(void)fprintf(listf,
639 			"%s: Reading archive to position at the end...", argv0);
640 		vfpart = 1;
641 	}
642 
643 	/*
644 	 * step through the archive until the format says it is done
645 	 */
646 	while (next_head(arcn) == 0) {
647 		/*
648 		 * check if this file meets user specified options.
649 		 */
650 		if (sel_chk(arcn) != 0) {
651 			if (rd_skip(arcn->skip + arcn->pad) == 1)
652 				break;
653 			continue;
654 		}
655 
656 		if (uflag) {
657 			/*
658 			 * see if this is the newest version of this file has
659 			 * already been seen, if so skip.
660 			 */
661 			if ((res = chk_ftime(arcn)) < 0)
662 				break;
663 			if (res > 0) {
664 				if (rd_skip(arcn->skip + arcn->pad) == 1)
665 					break;
666 				continue;
667 			}
668 		}
669 
670 		/*
671 		 * Store this device number. Device numbers seen during the
672 		 * read phase of append will cause newly appended files with a
673 		 * device number seen in the old part of the archive to be
674 		 * remapped to an unused device number.
675 		 */
676 		if ((udev && (add_dev(arcn) < 0)) ||
677 		    (rd_skip(arcn->skip + arcn->pad) == 1))
678 			break;
679 	}
680 
681 	/*
682 	 * done, finish up read and get the number of bytes to back up so we
683 	 * can add new members. The format might have used the hard link table,
684 	 * purge it.
685 	 */
686 	tlen = (*frmt->end_rd)();
687 	lnk_end();
688 
689 	/*
690 	 * try to position for write, if this fails quit. if any error occurs,
691 	 * we will refuse to write
692 	 */
693 	if (appnd_start(tlen) < 0)
694 		return;
695 
696 	/*
697 	 * tell the user we are done reading.
698 	 */
699 	if (vflag && vfpart) {
700 		(void)fputs("done.\n", listf);
701 		vfpart = 0;
702 	}
703 
704 	/*
705 	 * go to the writing phase to add the new members
706 	 */
707 	wr_archive(arcn, 1);
708 }
709 
710 /*
711  * archive()
712  *	write a new archive
713  */
714 
715 void
716 archive(void)
717 {
718 	ARCHD archd;
719 
720 	/*
721 	 * if we only are adding members that are newer, we need to save the
722 	 * mod times for all files; set up for writing; pass the format any
723 	 * options write the archive
724 	 */
725 	if ((uflag && (ftime_start() < 0)) || (wr_start() < 0))
726 		return;
727 	if ((*frmt->options)() < 0)
728 		return;
729 
730 	wr_archive(&archd, 0);
731 }
732 
733 /*
734  * copy()
735  *	copy files from one part of the file system to another. this does not
736  *	use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an
737  *	archive was written and then extracted in the destination directory
738  *	(except the files are forced to be under the destination directory).
739  */
740 
741 void
742 copy(void)
743 {
744 	ARCHD *arcn;
745 	int res;
746 	int fddest;
747 	char *dest_pt;
748 	size_t dlen;
749 	size_t drem;
750 	int fdsrc = -1;
751 	struct stat sb;
752 	ARCHD archd;
753 	char dirbuf[PAXPATHLEN+1];
754 
755 	sltab_start();
756 
757 	arcn = &archd;
758 	/*
759 	 * set up the destination dir path and make sure it is a directory. We
760 	 * make sure we have a trailing / on the destination
761 	 */
762 	dlen = strlcpy(dirbuf, dirptr, sizeof(dirbuf));
763 	if (dlen >= sizeof(dirbuf) ||
764 	    (dlen == sizeof(dirbuf) - 1 && dirbuf[dlen - 1] != '/')) {
765 		paxwarn(1, "directory name is too long %s", dirptr);
766 		return;
767 	}
768 	dest_pt = dirbuf + dlen;
769 	if (*(dest_pt-1) != '/') {
770 		*dest_pt++ = '/';
771 		*dest_pt = '\0';
772 		++dlen;
773 	}
774 	drem = PAXPATHLEN - dlen;
775 
776 	if (stat(dirptr, &sb) < 0) {
777 		syswarn(1, errno, "Cannot access destination directory %s",
778 			dirptr);
779 		return;
780 	}
781 	if (!S_ISDIR(sb.st_mode)) {
782 		paxwarn(1, "Destination is not a directory %s", dirptr);
783 		return;
784 	}
785 
786 	/*
787 	 * start up the hard link table; file traversal routines and the
788 	 * modification time and access mode database
789 	 */
790 	if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0))
791 		return;
792 
793 	/*
794 	 * When we are doing interactive rename, we store the mapping of names
795 	 * so we can fix up hard links files later in the archive.
796 	 */
797 	if (iflag && (name_start() < 0))
798 		return;
799 
800 	/*
801 	 * set up to cp file trees
802 	 */
803 	cp_start();
804 
805 	/*
806 	 * while there are files to archive, process them
807 	 */
808 	while (next_file(arcn) == 0) {
809 		fdsrc = -1;
810 
811 		/*
812 		 * check if this file meets user specified options
813 		 */
814 		if (sel_chk(arcn) != 0)
815 			continue;
816 
817 		/*
818 		 * if there is already a file in the destination directory with
819 		 * the same name and it is newer, skip the one stored on the
820 		 * archive.
821 		 * NOTE: this test is done BEFORE name modifications as
822 		 * specified by pax. this can be confusing to the user who
823 		 * might expect the test to be done on an existing file AFTER
824 		 * the name mod. In honesty the pax spec is probably flawed in
825 		 * this respect
826 		 */
827 		if (uflag || Dflag) {
828 			/*
829 			 * create the destination name
830 			 */
831 			if (strlcpy(dest_pt, arcn->name + (*arcn->name == '/'),
832 			    drem + 1) > drem) {
833 				paxwarn(1, "Destination pathname too long %s",
834 					arcn->name);
835 				continue;
836 			}
837 
838 			/*
839 			 * if existing file is same age or newer skip
840 			 */
841 			res = lstat(dirbuf, &sb);
842 			*dest_pt = '\0';
843 
844 			if (res == 0) {
845 				ftree_skipped_newer(arcn);
846 				if (cmp_file_times(uflag, Dflag, arcn, &sb))
847 					continue;
848 			}
849 		}
850 
851 		/*
852 		 * this file is considered selected. See if this is a hard link
853 		 * to a previous file; modify the name as requested by the
854 		 * user; set the final destination.
855 		 */
856 		ftree_sel(arcn);
857 		if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn)) < 0))
858 			break;
859 		if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) {
860 			/*
861 			 * skip file, purge from link table
862 			 */
863 			purg_lnk(arcn);
864 			continue;
865 		}
866 
867 		/*
868 		 * Non standard -Y and -Z flag. When the existing file is
869 		 * same age or newer skip
870 		 */
871 		if ((Yflag || Zflag) &&
872 		    cmp_file_times(Yflag, Zflag, arcn, NULL))
873 			continue;
874 
875 		if (vflag) {
876 			(void)safe_print(arcn->name, listf);
877 			vfpart = 1;
878 		}
879 		++flcnt;
880 
881 		/*
882 		 * try to create a hard link to the src file if requested
883 		 * but make sure we are not trying to overwrite ourselves.
884 		 */
885 		if (lflag)
886 			res = cross_lnk(arcn);
887 		else
888 			res = chk_same(arcn);
889 		if (res <= 0) {
890 			if (vflag && vfpart) {
891 				(void)putc('\n', listf);
892 				vfpart = 0;
893 			}
894 			continue;
895 		}
896 
897 		/*
898 		 * have to create a new file
899 		 */
900 		if (!PAX_IS_REG(arcn->type)) {
901 			/*
902 			 * create a link or special file
903 			 */
904 			if (PAX_IS_HARDLINK(arcn->type))
905 				res = lnk_creat(arcn);
906 			else
907 				res = node_creat(arcn);
908 			if (res < 0)
909 				purg_lnk(arcn);
910 			if (vflag && vfpart) {
911 				(void)putc('\n', listf);
912 				vfpart = 0;
913 			}
914 			continue;
915 		}
916 
917 		/*
918 		 * have to copy a regular file to the destination directory.
919 		 * first open source file and then create the destination file
920 		 */
921 		if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) {
922 			syswarn(1, errno, "Unable to open %s to read",
923 			    arcn->org_name);
924 			purg_lnk(arcn);
925 			continue;
926 		}
927 		if ((fddest = file_creat(arcn)) < 0) {
928 			rdfile_close(arcn, &fdsrc);
929 			purg_lnk(arcn);
930 			continue;
931 		}
932 
933 		/*
934 		 * copy source file data to the destination file
935 		 */
936 		cp_file(arcn, fdsrc, fddest);
937 		file_close(arcn, fddest);
938 		rdfile_close(arcn, &fdsrc);
939 
940 		if (vflag && vfpart) {
941 			(void)putc('\n', listf);
942 			vfpart = 0;
943 		}
944 	}
945 
946 	/*
947 	 * restore directory modes and times as required; make sure all
948 	 * patterns were selected block off signals to avoid chance for
949 	 * multiple entry into the cleanup code.
950 	 */
951 	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
952 	ar_close(0);
953 	sltab_process(0);
954 	proc_dir(0);
955 	ftree_chk();
956 }
957 
958 /*
959  * next_head()
960  *	try to find a valid header in the archive. Uses format specific
961  *	routines to extract the header and id the trailer. Trailers may be
962  *	located within a valid header or in an invalid header (the location
963  *	is format specific. The inhead field from the option table tells us
964  *	where to look for the trailer).
965  *	We keep reading (and resyncing) until we get enough contiguous data
966  *	to check for a header. If we cannot find one, we shift by a byte
967  *	add a new byte from the archive to the end of the buffer and try again.
968  *	If we get a read error, we throw out what we have (as we must have
969  *	contiguous data) and start over again.
970  *	ASSUMED: headers fit within a BLKMULT header.
971  * Return:
972  *	0 if we got a header, -1 if we are unable to ever find another one
973  *	(we reached the end of input, or we reached the limit on retries. see
974  *	the specs for rd_wrbuf() for more details)
975  */
976 
977 static int
978 next_head(ARCHD *arcn)
979 {
980 	int ret;
981 	char *hdend;
982 	int res;
983 	int shftsz;
984 	int hsz;
985 	int in_resync = 0;		/* set when we are in resync mode */
986 	int cnt = 0;			/* counter for trailer function */
987 	int first = 1;			/* on 1st read, EOF isn't premature. */
988 
989 	/*
990 	 * set up initial conditions, we want a whole frmt->hsz block as we
991 	 * have no data yet.
992 	 */
993 	res = hsz = frmt->hsz;
994 	hdend = hdbuf;
995 	shftsz = hsz - 1;
996 	for (;;) {
997 		/*
998 		 * keep looping until we get a contiguous FULL buffer
999 		 * (frmt->hsz is the proper size)
1000 		 */
1001 		for (;;) {
1002 			if ((ret = rd_wrbuf(hdend, res)) == res)
1003 				break;
1004 
1005 			/*
1006 			 * If we read 0 bytes (EOF) from an archive when we
1007 			 * expect to find a header, we have stepped upon
1008 			 * an archive without the customary block of zeroes
1009 			 * end marker.  It's just stupid to error out on
1010 			 * them, so exit gracefully.
1011 			 */
1012 			if (first && ret == 0)
1013 				return(-1);
1014 			first = 0;
1015 
1016 			/*
1017 			 * some kind of archive read problem, try to resync the
1018 			 * storage device, better give the user the bad news.
1019 			 */
1020 			if ((ret == 0) || (rd_sync() < 0)) {
1021 				paxwarn(1,"Premature end of file on archive read");
1022 				return(-1);
1023 			}
1024 			if (!in_resync) {
1025 				if (act == APPND) {
1026 					paxwarn(1,
1027 					  "Archive I/O error, cannot continue");
1028 					return(-1);
1029 				}
1030 				paxwarn(1,"Archive I/O error. Trying to recover.");
1031 				++in_resync;
1032 			}
1033 
1034 			/*
1035 			 * oh well, throw it all out and start over
1036 			 */
1037 			res = hsz;
1038 			hdend = hdbuf;
1039 		}
1040 
1041 		/*
1042 		 * ok we have a contiguous buffer of the right size. Call the
1043 		 * format read routine. If this was not a valid header and this
1044 		 * format stores trailers outside of the header, call the
1045 		 * format specific trailer routine to check for a trailer. We
1046 		 * have to watch out that we do not mis-identify file data or
1047 		 * block padding as a header or trailer. Format specific
1048 		 * trailer functions must NOT check for the trailer while we
1049 		 * are running in resync mode. Some trailer functions may tell
1050 		 * us that this block cannot contain a valid header either, so
1051 		 * we then throw out the entire block and start over.
1052 		 */
1053 		if ((*frmt->rd)(arcn, hdbuf) == 0)
1054 			break;
1055 
1056 		if (!frmt->inhead) {
1057 			/*
1058 			 * this format has trailers outside of valid headers
1059 			 */
1060 			if ((ret = (*frmt->trail)(arcn,hdbuf,in_resync,&cnt)) == 0){
1061 				/*
1062 				 * valid trailer found, drain input as required
1063 				 */
1064 				ar_drain();
1065 				return(-1);
1066 			}
1067 
1068 			if (ret == 1) {
1069 				/*
1070 				 * we are in resync and we were told to throw
1071 				 * the whole block out because none of the
1072 				 * bytes in this block can be used to form a
1073 				 * valid header
1074 				 */
1075 				res = hsz;
1076 				hdend = hdbuf;
1077 				continue;
1078 			}
1079 		}
1080 
1081 		/*
1082 		 * Brute force section.
1083 		 * not a valid header. We may be able to find a header yet. So
1084 		 * we shift over by one byte, and set up to read one byte at a
1085 		 * time from the archive and place it at the end of the buffer.
1086 		 * We will keep moving byte at a time until we find a header or
1087 		 * get a read error and have to start over.
1088 		 */
1089 		if (!in_resync) {
1090 			if (act == APPND) {
1091 				paxwarn(1,"Unable to append, archive header flaw");
1092 				return(-1);
1093 			}
1094 			paxwarn(1,"Invalid header, starting valid header search.");
1095 			++in_resync;
1096 		}
1097 		memmove(hdbuf, hdbuf+1, shftsz);
1098 		res = 1;
1099 		hdend = hdbuf + shftsz;
1100 	}
1101 
1102 	/*
1103 	 * ok got a valid header, check for trailer if format encodes it in the
1104 	 * the header. NOTE: the parameters are different than trailer routines
1105 	 * which encode trailers outside of the header!
1106 	 */
1107 	if (frmt->inhead && ((*frmt->trail)(arcn,NULL,0,NULL) == 0)) {
1108 		/*
1109 		 * valid trailer found, drain input as required
1110 		 */
1111 		ar_drain();
1112 		return(-1);
1113 	}
1114 
1115 	++flcnt;
1116 	return(0);
1117 }
1118 
1119 /*
1120  * get_arc()
1121  *	Figure out what format an archive is. Handles archive with flaws by
1122  *	brute force searches for a legal header in any supported format. The
1123  *	format id routines have to be careful to NOT mis-identify a format.
1124  *	ASSUMED: headers fit within a BLKMULT header.
1125  * Return:
1126  *	0 if archive found -1 otherwise
1127  */
1128 
1129 static int
1130 get_arc(void)
1131 {
1132 	int i;
1133 	int hdsz = 0;
1134 	int res;
1135 	int minhd = BLKMULT;
1136 	char *hdend;
1137 	int notice = 0;
1138 
1139 	/*
1140 	 * find the smallest header size in all archive formats and then set up
1141 	 * to read the archive.
1142 	 */
1143 	for (i = 0; ford[i] >= 0; ++i) {
1144 		if (fsub[ford[i]].name != NULL && fsub[ford[i]].hsz < minhd)
1145 			minhd = fsub[ford[i]].hsz;
1146 	}
1147 	if (rd_start() < 0)
1148 		return(-1);
1149 	res = BLKMULT;
1150 	hdsz = 0;
1151 	hdend = hdbuf;
1152 	for (;;) {
1153 		for (;;) {
1154 			/*
1155 			 * fill the buffer with at least the smallest header
1156 			 */
1157 			i = rd_wrbuf(hdend, res);
1158 			if (i > 0)
1159 				hdsz += i;
1160 			if (hdsz >= minhd)
1161 				break;
1162 
1163 			/*
1164 			 * if we cannot recover from a read error quit
1165 			 */
1166 			if ((i == 0) || (rd_sync() < 0))
1167 				goto out;
1168 
1169 			/*
1170 			 * when we get an error none of the data we already
1171 			 * have can be used to create a legal header (we just
1172 			 * got an error in the middle), so we throw it all out
1173 			 * and refill the buffer with fresh data.
1174 			 */
1175 			res = BLKMULT;
1176 			hdsz = 0;
1177 			hdend = hdbuf;
1178 			if (!notice) {
1179 				if (act == APPND)
1180 					return(-1);
1181 				paxwarn(1,"Cannot identify format. Searching...");
1182 				++notice;
1183 			}
1184 		}
1185 
1186 		/*
1187 		 * we have at least the size of the smallest header in any
1188 		 * archive format. Look to see if we have a match. The array
1189 		 * ford[] is used to specify the header id order to reduce the
1190 		 * chance of incorrectly id'ing a valid header (some formats
1191 		 * may be subsets of each other and the order would then be
1192 		 * important).
1193 		 */
1194 		for (i = 0; ford[i] >= 0; ++i) {
1195 			if (fsub[ford[i]].id == NULL ||
1196 			    (*fsub[ford[i]].id)(hdbuf, hdsz) < 0)
1197 				continue;
1198 			frmt = &(fsub[ford[i]]);
1199 			/*
1200 			 * yuck, to avoid slow special case code in the extract
1201 			 * routines, just push this header back as if it was
1202 			 * not seen. We have left extra space at start of the
1203 			 * buffer for this purpose. This is a bit ugly, but
1204 			 * adding all the special case code is far worse.
1205 			 */
1206 			pback(hdbuf, hdsz);
1207 			return(0);
1208 		}
1209 
1210 		/*
1211 		 * We have a flawed archive, no match. we start searching, but
1212 		 * we never allow additions to flawed archives
1213 		 */
1214 		if (!notice) {
1215 			if (act == APPND)
1216 				return(-1);
1217 			paxwarn(1, "Cannot identify format. Searching...");
1218 			++notice;
1219 		}
1220 
1221 		/*
1222 		 * brute force search for a header that we can id.
1223 		 * we shift through byte at a time. this is slow, but we cannot
1224 		 * determine the nature of the flaw in the archive in a
1225 		 * portable manner
1226 		 */
1227 		if (--hdsz > 0) {
1228 			memmove(hdbuf, hdbuf+1, hdsz);
1229 			res = BLKMULT - hdsz;
1230 			hdend = hdbuf + hdsz;
1231 		} else {
1232 			res = BLKMULT;
1233 			hdend = hdbuf;
1234 			hdsz = 0;
1235 		}
1236 	}
1237 
1238     out:
1239 	/*
1240 	 * we cannot find a header, bow, apologize and quit
1241 	 */
1242 	paxwarn(1, "Sorry, unable to determine archive format.");
1243 	return(-1);
1244 }
1245