1 /*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "archive_platform.h" 27 __FBSDID("$FreeBSD: src/lib/libarchive/archive_entry_link_resolver.c,v 1.4 2008/09/05 06:15:25 kientzle Exp $"); 28 29 #ifdef HAVE_SYS_STAT_H 30 #include <sys/stat.h> 31 #endif 32 #ifdef HAVE_ERRNO_H 33 #include <errno.h> 34 #endif 35 #include <stdio.h> 36 #ifdef HAVE_STDLIB_H 37 #include <stdlib.h> 38 #endif 39 #ifdef HAVE_STRING_H 40 #include <string.h> 41 #endif 42 43 #include "archive.h" 44 #include "archive_entry.h" 45 46 /* 47 * This is mostly a pretty straightforward hash table implementation. 48 * The only interesting bit is the different strategies used to 49 * match up links. These strategies match those used by various 50 * archiving formats: 51 * tar - content stored with first link, remainder refer back to it. 52 * This requires us to match each subsequent link up with the 53 * first appearance. 54 * cpio - Old cpio just stored body with each link, match-ups were 55 * implicit. This is trivial. 56 * new cpio - New cpio only stores body with last link, match-ups 57 * are implicit. This is actually quite tricky; see the notes 58 * below. 59 */ 60 61 /* Users pass us a format code, we translate that into a strategy here. */ 62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0 63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1 64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2 65 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3 66 67 /* Initial size of link cache. */ 68 #define links_cache_initial_size 1024 69 70 struct links_entry { 71 struct links_entry *next; 72 struct links_entry *previous; 73 int links; /* # links not yet seen */ 74 int hash; 75 struct archive_entry *canonical; 76 struct archive_entry *entry; 77 }; 78 79 struct archive_entry_linkresolver { 80 struct links_entry **buckets; 81 struct links_entry *spare; 82 unsigned long number_entries; 83 size_t number_buckets; 84 int strategy; 85 }; 86 87 static struct links_entry *find_entry(struct archive_entry_linkresolver *, 88 struct archive_entry *); 89 static void grow_hash(struct archive_entry_linkresolver *); 90 static struct links_entry *insert_entry(struct archive_entry_linkresolver *, 91 struct archive_entry *); 92 static struct links_entry *next_entry(struct archive_entry_linkresolver *); 93 94 struct archive_entry_linkresolver * 95 archive_entry_linkresolver_new(void) 96 { 97 struct archive_entry_linkresolver *res; 98 size_t i; 99 100 res = malloc(sizeof(struct archive_entry_linkresolver)); 101 if (res == NULL) 102 return (NULL); 103 memset(res, 0, sizeof(struct archive_entry_linkresolver)); 104 res->number_buckets = links_cache_initial_size; 105 res->buckets = malloc(res->number_buckets * 106 sizeof(res->buckets[0])); 107 if (res->buckets == NULL) { 108 free(res); 109 return (NULL); 110 } 111 for (i = 0; i < res->number_buckets; i++) 112 res->buckets[i] = NULL; 113 return (res); 114 } 115 116 void 117 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res, 118 int fmt) 119 { 120 int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK; 121 122 switch (fmtbase) { 123 case ARCHIVE_FORMAT_CPIO: 124 switch (fmt) { 125 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC: 126 case ARCHIVE_FORMAT_CPIO_SVR4_CRC: 127 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO; 128 break; 129 default: 130 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; 131 break; 132 } 133 break; 134 case ARCHIVE_FORMAT_MTREE: 135 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE; 136 break; 137 case ARCHIVE_FORMAT_TAR: 138 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; 139 break; 140 default: 141 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; 142 break; 143 } 144 } 145 146 void 147 archive_entry_linkresolver_free(struct archive_entry_linkresolver *res) 148 { 149 struct links_entry *le; 150 151 if (res == NULL) 152 return; 153 154 if (res->buckets != NULL) { 155 while ((le = next_entry(res)) != NULL) 156 archive_entry_free(le->entry); 157 free(res->buckets); 158 res->buckets = NULL; 159 } 160 free(res); 161 } 162 163 void 164 archive_entry_linkify(struct archive_entry_linkresolver *res, 165 struct archive_entry **e, struct archive_entry **f) 166 { 167 struct links_entry *le; 168 struct archive_entry *t; 169 170 *f = NULL; /* Default: Don't return a second entry. */ 171 172 if (*e == NULL) { 173 le = next_entry(res); 174 if (le != NULL) { 175 *e = le->entry; 176 le->entry = NULL; 177 } 178 return; 179 } 180 181 /* If it has only one link, then we're done. */ 182 if (archive_entry_nlink(*e) == 1) 183 return; 184 /* Directories never have hardlinks. */ 185 if (archive_entry_filetype(*e) == AE_IFDIR) 186 return; 187 188 switch (res->strategy) { 189 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR: 190 le = find_entry(res, *e); 191 if (le != NULL) { 192 archive_entry_unset_size(*e); 193 archive_entry_copy_hardlink(*e, 194 archive_entry_pathname(le->canonical)); 195 } else 196 insert_entry(res, *e); 197 return; 198 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE: 199 le = find_entry(res, *e); 200 if (le != NULL) { 201 archive_entry_copy_hardlink(*e, 202 archive_entry_pathname(le->canonical)); 203 } else 204 insert_entry(res, *e); 205 return; 206 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO: 207 /* This one is trivial. */ 208 return; 209 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO: 210 le = find_entry(res, *e); 211 if (le != NULL) { 212 /* 213 * Put the new entry in le, return the 214 * old entry from le. 215 */ 216 t = *e; 217 *e = le->entry; 218 le->entry = t; 219 /* Make the old entry into a hardlink. */ 220 archive_entry_unset_size(*e); 221 archive_entry_copy_hardlink(*e, 222 archive_entry_pathname(le->canonical)); 223 /* If we ran out of links, return the 224 * final entry as well. */ 225 if (le->links == 0) { 226 *f = le->entry; 227 le->entry = NULL; 228 } 229 } else { 230 /* 231 * If we haven't seen it, tuck it away 232 * for future use. 233 */ 234 le = insert_entry(res, *e); 235 le->entry = *e; 236 *e = NULL; 237 } 238 return; 239 default: 240 break; 241 } 242 return; 243 } 244 245 static struct links_entry * 246 find_entry(struct archive_entry_linkresolver *res, 247 struct archive_entry *entry) 248 { 249 struct links_entry *le; 250 int hash, bucket; 251 dev_t dev; 252 ino_t ino; 253 254 /* Free a held entry. */ 255 if (res->spare != NULL) { 256 archive_entry_free(res->spare->canonical); 257 archive_entry_free(res->spare->entry); 258 free(res->spare); 259 res->spare = NULL; 260 } 261 262 /* If the links cache overflowed and got flushed, don't bother. */ 263 if (res->buckets == NULL) 264 return (NULL); 265 266 dev = archive_entry_dev(entry); 267 ino = archive_entry_ino(entry); 268 hash = dev ^ ino; 269 270 /* Try to locate this entry in the links cache. */ 271 bucket = hash % res->number_buckets; 272 for (le = res->buckets[bucket]; le != NULL; le = le->next) { 273 if (le->hash == hash 274 && dev == archive_entry_dev(le->canonical) 275 && ino == archive_entry_ino(le->canonical)) { 276 /* 277 * Decrement link count each time and release 278 * the entry if it hits zero. This saves 279 * memory and is necessary for detecting 280 * missed links. 281 */ 282 --le->links; 283 if (le->links > 0) 284 return (le); 285 /* Remove it from this hash bucket. */ 286 if (le->previous != NULL) 287 le->previous->next = le->next; 288 if (le->next != NULL) 289 le->next->previous = le->previous; 290 if (res->buckets[bucket] == le) 291 res->buckets[bucket] = le->next; 292 res->number_entries--; 293 /* Defer freeing this entry. */ 294 res->spare = le; 295 return (le); 296 } 297 } 298 return (NULL); 299 } 300 301 static struct links_entry * 302 next_entry(struct archive_entry_linkresolver *res) 303 { 304 struct links_entry *le; 305 size_t bucket; 306 307 /* Free a held entry. */ 308 if (res->spare != NULL) { 309 archive_entry_free(res->spare->canonical); 310 free(res->spare); 311 res->spare = NULL; 312 } 313 314 /* If the links cache overflowed and got flushed, don't bother. */ 315 if (res->buckets == NULL) 316 return (NULL); 317 318 /* Look for next non-empty bucket in the links cache. */ 319 for (bucket = 0; bucket < res->number_buckets; bucket++) { 320 le = res->buckets[bucket]; 321 if (le != NULL) { 322 /* Remove it from this hash bucket. */ 323 if (le->next != NULL) 324 le->next->previous = le->previous; 325 res->buckets[bucket] = le->next; 326 res->number_entries--; 327 /* Defer freeing this entry. */ 328 res->spare = le; 329 return (le); 330 } 331 } 332 return (NULL); 333 } 334 335 static struct links_entry * 336 insert_entry(struct archive_entry_linkresolver *res, 337 struct archive_entry *entry) 338 { 339 struct links_entry *le; 340 int hash, bucket; 341 342 /* Add this entry to the links cache. */ 343 le = malloc(sizeof(struct links_entry)); 344 if (le == NULL) 345 return (NULL); 346 memset(le, 0, sizeof(*le)); 347 le->canonical = archive_entry_clone(entry); 348 349 /* If the links cache is getting too full, enlarge the hash table. */ 350 if (res->number_entries > res->number_buckets * 2) 351 grow_hash(res); 352 353 hash = archive_entry_dev(entry) ^ archive_entry_ino(entry); 354 bucket = hash % res->number_buckets; 355 356 /* If we could allocate the entry, record it. */ 357 if (res->buckets[bucket] != NULL) 358 res->buckets[bucket]->previous = le; 359 res->number_entries++; 360 le->next = res->buckets[bucket]; 361 le->previous = NULL; 362 res->buckets[bucket] = le; 363 le->hash = hash; 364 le->links = archive_entry_nlink(entry) - 1; 365 return (le); 366 } 367 368 static void 369 grow_hash(struct archive_entry_linkresolver *res) 370 { 371 struct links_entry *le, **new_buckets; 372 size_t new_size; 373 size_t i, bucket; 374 375 /* Try to enlarge the bucket list. */ 376 new_size = res->number_buckets * 2; 377 new_buckets = malloc(new_size * sizeof(struct links_entry *)); 378 379 if (new_buckets != NULL) { 380 memset(new_buckets, 0, 381 new_size * sizeof(struct links_entry *)); 382 for (i = 0; i < res->number_buckets; i++) { 383 while (res->buckets[i] != NULL) { 384 /* Remove entry from old bucket. */ 385 le = res->buckets[i]; 386 res->buckets[i] = le->next; 387 388 /* Add entry to new bucket. */ 389 bucket = le->hash % new_size; 390 391 if (new_buckets[bucket] != NULL) 392 new_buckets[bucket]->previous = 393 le; 394 le->next = new_buckets[bucket]; 395 le->previous = NULL; 396 new_buckets[bucket] = le; 397 } 398 } 399 free(res->buckets); 400 res->buckets = new_buckets; 401 res->number_buckets = new_size; 402 } 403 } 404