1 /*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "archive_platform.h" 27 __FBSDID("$FreeBSD: head/lib/libarchive/archive_entry_link_resolver.c 201100 2009-12-28 03:05:31Z kientzle $"); 28 29 #ifdef HAVE_SYS_STAT_H 30 #include <sys/stat.h> 31 #endif 32 #ifdef HAVE_ERRNO_H 33 #include <errno.h> 34 #endif 35 #include <stdio.h> 36 #ifdef HAVE_STDLIB_H 37 #include <stdlib.h> 38 #endif 39 #ifdef HAVE_STRING_H 40 #include <string.h> 41 #endif 42 43 #include "archive.h" 44 #include "archive_entry.h" 45 46 /* 47 * This is mostly a pretty straightforward hash table implementation. 48 * The only interesting bit is the different strategies used to 49 * match up links. These strategies match those used by various 50 * archiving formats: 51 * tar - content stored with first link, remainder refer back to it. 52 * This requires us to match each subsequent link up with the 53 * first appearance. 54 * cpio - Old cpio just stored body with each link, match-ups were 55 * implicit. This is trivial. 56 * new cpio - New cpio only stores body with last link, match-ups 57 * are implicit. This is actually quite tricky; see the notes 58 * below. 59 */ 60 61 /* Users pass us a format code, we translate that into a strategy here. */ 62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0 63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1 64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2 65 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3 66 67 /* Initial size of link cache. */ 68 #define links_cache_initial_size 1024 69 70 struct links_entry { 71 struct links_entry *next; 72 struct links_entry *previous; 73 int links; /* # links not yet seen */ 74 int hash; 75 struct archive_entry *canonical; 76 struct archive_entry *entry; 77 }; 78 79 struct archive_entry_linkresolver { 80 struct links_entry **buckets; 81 struct links_entry *spare; 82 unsigned long number_entries; 83 size_t number_buckets; 84 int strategy; 85 }; 86 87 static struct links_entry *find_entry(struct archive_entry_linkresolver *, 88 struct archive_entry *); 89 static void grow_hash(struct archive_entry_linkresolver *); 90 static struct links_entry *insert_entry(struct archive_entry_linkresolver *, 91 struct archive_entry *); 92 static struct links_entry *next_entry(struct archive_entry_linkresolver *); 93 94 struct archive_entry_linkresolver * 95 archive_entry_linkresolver_new(void) 96 { 97 struct archive_entry_linkresolver *res; 98 size_t i; 99 100 res = malloc(sizeof(struct archive_entry_linkresolver)); 101 if (res == NULL) 102 return (NULL); 103 memset(res, 0, sizeof(struct archive_entry_linkresolver)); 104 res->number_buckets = links_cache_initial_size; 105 res->buckets = malloc(res->number_buckets * 106 sizeof(res->buckets[0])); 107 if (res->buckets == NULL) { 108 free(res); 109 return (NULL); 110 } 111 for (i = 0; i < res->number_buckets; i++) 112 res->buckets[i] = NULL; 113 return (res); 114 } 115 116 void 117 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res, 118 int fmt) 119 { 120 int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK; 121 122 switch (fmtbase) { 123 case ARCHIVE_FORMAT_CPIO: 124 switch (fmt) { 125 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC: 126 case ARCHIVE_FORMAT_CPIO_SVR4_CRC: 127 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO; 128 break; 129 default: 130 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 131 break; 132 } 133 break; 134 case ARCHIVE_FORMAT_MTREE: 135 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE; 136 break; 137 case ARCHIVE_FORMAT_TAR: 138 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; 139 break; 140 default: 141 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; 142 break; 143 } 144 } 145 146 void 147 archive_entry_linkresolver_free(struct archive_entry_linkresolver *res) 148 { 149 struct links_entry *le; 150 151 if (res == NULL) 152 return; 153 154 if (res->buckets != NULL) { 155 while ((le = next_entry(res)) != NULL) 156 archive_entry_free(le->entry); 157 free(res->buckets); 158 res->buckets = NULL; 159 } 160 free(res); 161 } 162 163 void 164 archive_entry_linkify(struct archive_entry_linkresolver *res, 165 struct archive_entry **e, struct archive_entry **f) 166 { 167 struct links_entry *le; 168 struct archive_entry *t; 169 170 *f = NULL; /* Default: Don't return a second entry. */ 171 172 if (*e == NULL) { 173 le = next_entry(res); 174 if (le != NULL) { 175 *e = le->entry; 176 le->entry = NULL; 177 } 178 return; 179 } 180 181 /* If it has only one link, then we're done. */ 182 if (archive_entry_nlink(*e) == 1) 183 return; 184 /* Directories, devices never have hardlinks. */ 185 if (archive_entry_filetype(*e) == AE_IFDIR 186 || archive_entry_filetype(*e) == AE_IFBLK 187 || archive_entry_filetype(*e) == AE_IFCHR) 188 return; 189 190 switch (res->strategy) { 191 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR: 192 le = find_entry(res, *e); 193 if (le != NULL) { 194 archive_entry_unset_size(*e); 195 archive_entry_copy_hardlink(*e, 196 archive_entry_pathname(le->canonical)); 197 } else 198 insert_entry(res, *e); 199 return; 200 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: 201 le = find_entry(res, *e); 202 if (le != NULL) { 203 archive_entry_copy_hardlink(*e, 204 archive_entry_pathname(le->canonical)); 205 } else 206 insert_entry(res, *e); 207 return; 208 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO: 209 /* This one is trivial. */ 210 return; 211 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO: 212 le = find_entry(res, *e); 213 if (le != NULL) { 214 /* 215 * Put the new entry in le, return the 216 * old entry from le. 217 */ 218 t = *e; 219 *e = le->entry; 220 le->entry = t; 221 /* Make the old entry into a hardlink. */ 222 archive_entry_unset_size(*e); 223 archive_entry_copy_hardlink(*e, 224 archive_entry_pathname(le->canonical)); 225 /* If we ran out of links, return the 226 * final entry as well. */ 227 if (le->links == 0) { 228 *f = le->entry; 229 le->entry = NULL; 230 } 231 } else { 232 /* 233 * If we haven't seen it, tuck it away 234 * for future use. 235 */ 236 le = insert_entry(res, *e); 237 le->entry = *e; 238 *e = NULL; 239 } 240 return; 241 default: 242 break; 243 } 244 return; 245 } 246 247 static struct links_entry * 248 find_entry(struct archive_entry_linkresolver *res, 249 struct archive_entry *entry) 250 { 251 struct links_entry *le; 252 int hash, bucket; 253 dev_t dev; 254 int64_t ino; 255 256 /* Free a held entry. */ 257 if (res->spare != NULL) { 258 archive_entry_free(res->spare->canonical); 259 archive_entry_free(res->spare->entry); 260 free(res->spare); 261 res->spare = NULL; 262 } 263 264 /* If the links cache overflowed and got flushed, don't bother. */ 265 if (res->buckets == NULL) 266 return (NULL); 267 268 dev = archive_entry_dev(entry); 269 ino = archive_entry_ino64(entry); 270 hash = (int)(dev ^ ino); 271 272 /* Try to locate this entry in the links cache. */ 273 bucket = hash % res->number_buckets; 274 for (le = res->buckets[bucket]; le != NULL; le = le->next) { 275 if (le->hash == hash 276 && dev == archive_entry_dev(le->canonical) 277 && ino == archive_entry_ino64(le->canonical)) { 278 /* 279 * Decrement link count each time and release 280 * the entry if it hits zero. This saves 281 * memory and is necessary for detecting 282 * missed links. 283 */ 284 --le->links; 285 if (le->links > 0) 286 return (le); 287 /* Remove it from this hash bucket. */ 288 if (le->previous != NULL) 289 le->previous->next = le->next; 290 if (le->next != NULL) 291 le->next->previous = le->previous; 292 if (res->buckets[bucket] == le) 293 res->buckets[bucket] = le->next; 294 res->number_entries--; 295 /* Defer freeing this entry. */ 296 res->spare = le; 297 return (le); 298 } 299 } 300 return (NULL); 301 } 302 303 static struct links_entry * 304 next_entry(struct archive_entry_linkresolver *res) 305 { 306 struct links_entry *le; 307 size_t bucket; 308 309 /* Free a held entry. */ 310 if (res->spare != NULL) { 311 archive_entry_free(res->spare->canonical); 312 free(res->spare); 313 res->spare = NULL; 314 } 315 316 /* If the links cache overflowed and got flushed, don't bother. */ 317 if (res->buckets == NULL) 318 return (NULL); 319 320 /* Look for next non-empty bucket in the links cache. */ 321 for (bucket = 0; bucket < res->number_buckets; bucket++) { 322 le = res->buckets[bucket]; 323 if (le != NULL) { 324 /* Remove it from this hash bucket. */ 325 if (le->next != NULL) 326 le->next->previous = le->previous; 327 res->buckets[bucket] = le->next; 328 res->number_entries--; 329 /* Defer freeing this entry. */ 330 res->spare = le; 331 return (le); 332 } 333 } 334 return (NULL); 335 } 336 337 static struct links_entry * 338 insert_entry(struct archive_entry_linkresolver *res, 339 struct archive_entry *entry) 340 { 341 struct links_entry *le; 342 int hash, bucket; 343 344 /* Add this entry to the links cache. */ 345 le = malloc(sizeof(struct links_entry)); 346 if (le == NULL) 347 return (NULL); 348 memset(le, 0, sizeof(*le)); 349 le->canonical = archive_entry_clone(entry); 350 351 /* If the links cache is getting too full, enlarge the hash table. */ 352 if (res->number_entries > res->number_buckets * 2) 353 grow_hash(res); 354 355 hash = archive_entry_dev(entry) ^ archive_entry_ino64(entry); 356 bucket = hash % res->number_buckets; 357 358 /* If we could allocate the entry, record it. */ 359 if (res->buckets[bucket] != NULL) 360 res->buckets[bucket]->previous = le; 361 res->number_entries++; 362 le->next = res->buckets[bucket]; 363 le->previous = NULL; 364 res->buckets[bucket] = le; 365 le->hash = hash; 366 le->links = archive_entry_nlink(entry) - 1; 367 return (le); 368 } 369 370 static void 371 grow_hash(struct archive_entry_linkresolver *res) 372 { 373 struct links_entry *le, **new_buckets; 374 size_t new_size; 375 size_t i, bucket; 376 377 /* Try to enlarge the bucket list. */ 378 new_size = res->number_buckets * 2; 379 new_buckets = malloc(new_size * sizeof(struct links_entry *)); 380 381 if (new_buckets != NULL) { 382 memset(new_buckets, 0, 383 new_size * sizeof(struct links_entry *)); 384 for (i = 0; i < res->number_buckets; i++) { 385 while (res->buckets[i] != NULL) { 386 /* Remove entry from old bucket. */ 387 le = res->buckets[i]; 388 res->buckets[i] = le->next; 389 390 /* Add entry to new bucket. */ 391 bucket = le->hash % new_size; 392 393 if (new_buckets[bucket] != NULL) 394 new_buckets[bucket]->previous = 395 le; 396 le->next = new_buckets[bucket]; 397 le->previous = NULL; 398 new_buckets[bucket] = le; 399 } 400 } 401 free(res->buckets); 402 res->buckets = new_buckets; 403 res->number_buckets = new_size; 404 } 405 } 406