1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: head/lib/libarchive/archive_entry_link_resolver.c 201100 2009-12-28 03:05:31Z kientzle $");
28 
29 #ifdef HAVE_SYS_STAT_H
30 #include <sys/stat.h>
31 #endif
32 #ifdef HAVE_ERRNO_H
33 #include <errno.h>
34 #endif
35 #include <stdio.h>
36 #ifdef HAVE_STDLIB_H
37 #include <stdlib.h>
38 #endif
39 #ifdef HAVE_STRING_H
40 #include <string.h>
41 #endif
42 
43 #include "archive.h"
44 #include "archive_entry.h"
45 
46 /*
47  * This is mostly a pretty straightforward hash table implementation.
48  * The only interesting bit is the different strategies used to
49  * match up links.  These strategies match those used by various
50  * archiving formats:
51  *   tar - content stored with first link, remainder refer back to it.
52  *       This requires us to match each subsequent link up with the
53  *       first appearance.
54  *   cpio - Old cpio just stored body with each link, match-ups were
55  *       implicit.  This is trivial.
56  *   new cpio - New cpio only stores body with last link, match-ups
57  *       are implicit.  This is actually quite tricky; see the notes
58  *       below.
59  */
60 
61 /* Users pass us a format code, we translate that into a strategy here. */
62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR	0
63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
66 
67 /* Initial size of link cache. */
68 #define	links_cache_initial_size 1024
69 
70 struct links_entry {
71 	struct links_entry	*next;
72 	struct links_entry	*previous;
73 	struct archive_entry	*canonical;
74 	struct archive_entry	*entry;
75 	size_t			 hash;
76 	unsigned int		 links; /* # links not yet seen */
77 };
78 
79 struct archive_entry_linkresolver {
80 	struct links_entry	**buckets;
81 	struct links_entry	 *spare;
82 	unsigned long		  number_entries;
83 	size_t			  number_buckets;
84 	int			  strategy;
85 };
86 
87 #define	NEXT_ENTRY_DEFERRED	1
88 #define	NEXT_ENTRY_PARTIAL	2
89 #define	NEXT_ENTRY_ALL		(NEXT_ENTRY_DEFERRED | NEXT_ENTRY_PARTIAL)
90 
91 static struct links_entry *find_entry(struct archive_entry_linkresolver *,
92 		    struct archive_entry *);
93 static void grow_hash(struct archive_entry_linkresolver *);
94 static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
95 		    struct archive_entry *);
96 static struct links_entry *next_entry(struct archive_entry_linkresolver *,
97     int);
98 
99 struct archive_entry_linkresolver *
100 archive_entry_linkresolver_new(void)
101 {
102 	struct archive_entry_linkresolver *res;
103 
104 	/* Check for positive power-of-two */
105 	if (links_cache_initial_size == 0 ||
106 	    (links_cache_initial_size & (links_cache_initial_size - 1)) != 0)
107 		return (NULL);
108 
109 	res = calloc(1, sizeof(struct archive_entry_linkresolver));
110 	if (res == NULL)
111 		return (NULL);
112 	res->number_buckets = links_cache_initial_size;
113 	res->buckets = calloc(res->number_buckets, sizeof(res->buckets[0]));
114 	if (res->buckets == NULL) {
115 		free(res);
116 		return (NULL);
117 	}
118 	return (res);
119 }
120 
121 void
122 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
123     int fmt)
124 {
125 	int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
126 
127 	switch (fmtbase) {
128 	case ARCHIVE_FORMAT_7ZIP:
129 	case ARCHIVE_FORMAT_AR:
130 	case ARCHIVE_FORMAT_ZIP:
131 		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
132 		break;
133 	case ARCHIVE_FORMAT_CPIO:
134 		switch (fmt) {
135 		case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
136 		case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
137 			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
138 			break;
139 		default:
140 			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
141 			break;
142 		}
143 		break;
144 	case ARCHIVE_FORMAT_MTREE:
145 		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
146 		break;
147 	case ARCHIVE_FORMAT_ISO9660:
148 	case ARCHIVE_FORMAT_SHAR:
149 	case ARCHIVE_FORMAT_TAR:
150 	case ARCHIVE_FORMAT_XAR:
151 		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
152 		break;
153 	default:
154 		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
155 		break;
156 	}
157 }
158 
159 void
160 archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
161 {
162 	struct links_entry *le;
163 
164 	if (res == NULL)
165 		return;
166 
167 	while ((le = next_entry(res, NEXT_ENTRY_ALL)) != NULL)
168 		archive_entry_free(le->entry);
169 	free(res->buckets);
170 	free(res);
171 }
172 
173 void
174 archive_entry_linkify(struct archive_entry_linkresolver *res,
175     struct archive_entry **e, struct archive_entry **f)
176 {
177 	struct links_entry *le;
178 	struct archive_entry *t;
179 
180 	*f = NULL; /* Default: Don't return a second entry. */
181 
182 	if (*e == NULL) {
183 		le = next_entry(res, NEXT_ENTRY_DEFERRED);
184 		if (le != NULL) {
185 			*e = le->entry;
186 			le->entry = NULL;
187 		}
188 		return;
189 	}
190 
191 	/* If it has only one link, then we're done. */
192 	if (archive_entry_nlink(*e) == 1)
193 		return;
194 	/* Directories, devices never have hardlinks. */
195 	if (archive_entry_filetype(*e) == AE_IFDIR
196 	    || archive_entry_filetype(*e) == AE_IFBLK
197 	    || archive_entry_filetype(*e) == AE_IFCHR)
198 		return;
199 
200 	switch (res->strategy) {
201 	case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
202 		le = find_entry(res, *e);
203 		if (le != NULL) {
204 			archive_entry_unset_size(*e);
205 			archive_entry_copy_hardlink(*e,
206 			    archive_entry_pathname(le->canonical));
207 		} else
208 			insert_entry(res, *e);
209 		return;
210 	case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
211 		le = find_entry(res, *e);
212 		if (le != NULL) {
213 			archive_entry_copy_hardlink(*e,
214 			    archive_entry_pathname(le->canonical));
215 		} else
216 			insert_entry(res, *e);
217 		return;
218 	case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
219 		/* This one is trivial. */
220 		return;
221 	case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
222 		le = find_entry(res, *e);
223 		if (le != NULL) {
224 			/*
225 			 * Put the new entry in le, return the
226 			 * old entry from le.
227 			 */
228 			t = *e;
229 			*e = le->entry;
230 			le->entry = t;
231 			/* Make the old entry into a hardlink. */
232 			archive_entry_unset_size(*e);
233 			archive_entry_copy_hardlink(*e,
234 			    archive_entry_pathname(le->canonical));
235 			/* If we ran out of links, return the
236 			 * final entry as well. */
237 			if (le->links == 0) {
238 				*f = le->entry;
239 				le->entry = NULL;
240 			}
241 		} else {
242 			/*
243 			 * If we haven't seen it, tuck it away
244 			 * for future use.
245 			 */
246 			le = insert_entry(res, *e);
247 			le->entry = *e;
248 			*e = NULL;
249 		}
250 		return;
251 	default:
252 		break;
253 	}
254 	return;
255 }
256 
257 static struct links_entry *
258 find_entry(struct archive_entry_linkresolver *res,
259     struct archive_entry *entry)
260 {
261 	struct links_entry	*le;
262 	size_t			 hash, bucket;
263 	dev_t			 dev;
264 	int64_t			 ino;
265 
266 	/* Free a held entry. */
267 	if (res->spare != NULL) {
268 		archive_entry_free(res->spare->canonical);
269 		archive_entry_free(res->spare->entry);
270 		free(res->spare);
271 		res->spare = NULL;
272 	}
273 
274 	dev = archive_entry_dev(entry);
275 	ino = archive_entry_ino64(entry);
276 	hash = (size_t)(dev ^ ino);
277 
278 	/* Try to locate this entry in the links cache. */
279 	bucket = hash & (res->number_buckets - 1);
280 	for (le = res->buckets[bucket]; le != NULL; le = le->next) {
281 		if (le->hash == hash
282 		    && dev == archive_entry_dev(le->canonical)
283 		    && ino == archive_entry_ino64(le->canonical)) {
284 			/*
285 			 * Decrement link count each time and release
286 			 * the entry if it hits zero.  This saves
287 			 * memory and is necessary for detecting
288 			 * missed links.
289 			 */
290 			--le->links;
291 			if (le->links > 0)
292 				return (le);
293 			/* Remove it from this hash bucket. */
294 			if (le->previous != NULL)
295 				le->previous->next = le->next;
296 			if (le->next != NULL)
297 				le->next->previous = le->previous;
298 			if (res->buckets[bucket] == le)
299 				res->buckets[bucket] = le->next;
300 			res->number_entries--;
301 			/* Defer freeing this entry. */
302 			res->spare = le;
303 			return (le);
304 		}
305 	}
306 	return (NULL);
307 }
308 
309 static struct links_entry *
310 next_entry(struct archive_entry_linkresolver *res, int mode)
311 {
312 	struct links_entry	*le;
313 	size_t			 bucket;
314 
315 	/* Free a held entry. */
316 	if (res->spare != NULL) {
317 		archive_entry_free(res->spare->canonical);
318 		archive_entry_free(res->spare->entry);
319 		free(res->spare);
320 		res->spare = NULL;
321 	}
322 
323 	/* Look for next non-empty bucket in the links cache. */
324 	for (bucket = 0; bucket < res->number_buckets; bucket++) {
325 		for (le = res->buckets[bucket]; le != NULL; le = le->next) {
326 			if (le->entry != NULL &&
327 			    (mode & NEXT_ENTRY_DEFERRED) == 0)
328 				continue;
329 			if (le->entry == NULL &&
330 			    (mode & NEXT_ENTRY_PARTIAL) == 0)
331 				continue;
332 			/* Remove it from this hash bucket. */
333 			if (le->next != NULL)
334 				le->next->previous = le->previous;
335 			if (le->previous != NULL)
336 				le->previous->next = le->next;
337 			else
338 				res->buckets[bucket] = le->next;
339 			res->number_entries--;
340 			/* Defer freeing this entry. */
341 			res->spare = le;
342 			return (le);
343 		}
344 	}
345 	return (NULL);
346 }
347 
348 static struct links_entry *
349 insert_entry(struct archive_entry_linkresolver *res,
350     struct archive_entry *entry)
351 {
352 	struct links_entry *le;
353 	size_t hash, bucket;
354 
355 	/* Add this entry to the links cache. */
356 	le = calloc(1, sizeof(struct links_entry));
357 	if (le == NULL)
358 		return (NULL);
359 	le->canonical = archive_entry_clone(entry);
360 
361 	/* If the links cache is getting too full, enlarge the hash table. */
362 	if (res->number_entries > res->number_buckets * 2)
363 		grow_hash(res);
364 
365 	hash = archive_entry_dev(entry) ^ archive_entry_ino64(entry);
366 	bucket = hash & (res->number_buckets - 1);
367 
368 	/* If we could allocate the entry, record it. */
369 	if (res->buckets[bucket] != NULL)
370 		res->buckets[bucket]->previous = le;
371 	res->number_entries++;
372 	le->next = res->buckets[bucket];
373 	le->previous = NULL;
374 	res->buckets[bucket] = le;
375 	le->hash = hash;
376 	le->links = archive_entry_nlink(entry) - 1;
377 	return (le);
378 }
379 
380 static void
381 grow_hash(struct archive_entry_linkresolver *res)
382 {
383 	struct links_entry *le, **new_buckets;
384 	size_t new_size;
385 	size_t i, bucket;
386 
387 	/* Try to enlarge the bucket list. */
388 	new_size = res->number_buckets * 2;
389 	if (new_size < res->number_buckets)
390 		return;
391 	new_buckets = calloc(new_size, sizeof(struct links_entry *));
392 
393 	if (new_buckets == NULL)
394 		return;
395 
396 	for (i = 0; i < res->number_buckets; i++) {
397 		while (res->buckets[i] != NULL) {
398 			/* Remove entry from old bucket. */
399 			le = res->buckets[i];
400 			res->buckets[i] = le->next;
401 
402 			/* Add entry to new bucket. */
403 			bucket = le->hash & (new_size - 1);
404 
405 			if (new_buckets[bucket] != NULL)
406 				new_buckets[bucket]->previous = le;
407 			le->next = new_buckets[bucket];
408 			le->previous = NULL;
409 			new_buckets[bucket] = le;
410 		}
411 	}
412 	free(res->buckets);
413 	res->buckets = new_buckets;
414 	res->number_buckets = new_size;
415 }
416 
417 struct archive_entry *
418 archive_entry_partial_links(struct archive_entry_linkresolver *res,
419     unsigned int *links)
420 {
421 	struct archive_entry	*e;
422 	struct links_entry	*le;
423 
424 	/* Free a held entry. */
425 	if (res->spare != NULL) {
426 		archive_entry_free(res->spare->canonical);
427 		archive_entry_free(res->spare->entry);
428 		free(res->spare);
429 		res->spare = NULL;
430 	}
431 
432 	le = next_entry(res, NEXT_ENTRY_PARTIAL);
433 	if (le != NULL) {
434 		e = le->canonical;
435 		if (links != NULL)
436 			*links = le->links;
437 		le->canonical = NULL;
438 	} else {
439 		e = NULL;
440 		if (links != NULL)
441 			*links = 0;
442 	}
443 	return (e);
444 }
445