1 /*
2  * Copyright (c) 2008-2013 Zmanda, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with this program; if not, write to the Free Software Foundation, Inc.,
16  * 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
17  *
18  * Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
19  * Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
20  */
21 
22 #include "amanda.h"
23 #include "util.h"
24 #include "amar.h"
25 
26 /* Each block in an archive is made up of one or more records, where each
27  * record is either a header record or a data record.  The two are
28  * distinguished by the header magic string; the string 'AM' is
29  * explicitly excluded as an allowed filenum to prevent ambiguity. */
30 
31 #define HEADER_MAGIC "AMANDA ARCHIVE FORMAT"
32 #define MAGIC_FILENUM 0x414d
33 #define HEADER_VERSION 1
34 #define EOA_BIT 0x80000000
35 
36 typedef struct header_s {
37     /* magic is HEADER_MAGIC + ' ' + decimal version, NUL padded */
38     char     magic[28];
39 } header_t;
40 #define HEADER_SIZE (SIZEOF(header_t))
41 
42 typedef struct record_s {
43     guint16  filenum;
44     guint16  attrid;
45     guint32  size;
46 } record_t;
47 #define RECORD_SIZE (SIZEOF(record_t))
48 #define MAX_RECORD_DATA_SIZE (4*1024*1024)
49 
50 #define MKRECORD(ptr, f, a, s, eoa) do { \
51     record_t r; \
52     uint32_t size = s; \
53     if (eoa) size |= EOA_BIT; \
54     r.filenum = htons(f); \
55     r.attrid = htons(a); \
56     r.size = htonl(size); \
57     memcpy(ptr, &r, sizeof(record_t)); \
58 } while(0)
59 
60 /* N.B. - f, a, s, and eoa must be simple lvalues */
61 #define GETRECORD(ptr, f, a, s, eoa) do { \
62     record_t r; \
63     memcpy(&r, ptr, sizeof(record_t)); \
64     s = ntohl(r.size); \
65     if (s & EOA_BIT) { \
66 	eoa = TRUE; \
67 	s &= ~EOA_BIT; \
68     } else { \
69 	eoa = FALSE; \
70     } \
71     f = ntohs(r.filenum); \
72     a = ntohs(r.attrid); \
73 } while(0)
74 
75 /* performance knob: how much data will we buffer before just
76  * writing straight out of the user's buffers? */
77 #define WRITE_BUFFER_SIZE (512*1024)
78 
79 typedef struct amar_file_attr_handling_s {
80     guint16  filenum;
81     guint16  attrid;
82     int      fd;
83 } amar_file_attr_handling_t;
84 
85 typedef struct handling_params_s {
86     /* parameters from the user */
87     gpointer user_data;
88     amar_attr_handling_t *handling_array;
89     amar_file_attr_handling_t *handling_file_attr_array;
90     amar_file_start_callback_t file_start_cb;
91     amar_file_finish_callback_t file_finish_cb;
92     amar_done_callback_t done_cb;
93     GError **error;
94 
95     /* tracking for open files and attributes */
96     GSList *file_states;
97 
98     /* read buffer */
99     gchar *buf;
100     gsize buf_size; /* allocated size */
101     gsize buf_len; /* number of active bytes .. */
102     gsize buf_offset; /* ..starting at buf + buf_offset */
103     gboolean got_eof;
104     gboolean just_lseeked; /* did we just call lseek? */
105     event_handle_t *event_read_extract;
106 } handling_params_t;
107 
108 struct amar_s {
109     int       fd;		/* file descriptor			*/
110     mode_t    mode;		/* mode O_RDONLY or O_WRONLY		*/
111     guint16   maxfilenum;	/* Next file number to allocate		*/
112     header_t  hdr;		/* pre-constructed header		*/
113     off_t     position;		/* current position in the archive	*/
114     GHashTable *files;		/* List of all amar_file_t		*/
115     gboolean  seekable;		/* does lseek() work on this fd?	*/
116 
117     /* internal buffer; on writing, this is WRITE_BUFFER_SIZE bytes, and
118      * always has at least RECORD_SIZE bytes free. */
119     gchar *buf;
120     size_t buf_len;
121     size_t buf_size;
122     handling_params_t *hp;
123 };
124 
125 struct amar_file_s {
126     amar_t     *archive;	/* archive for this file	*/
127     off_t       size;		/* size of the file             */
128     gint        filenum;	/* filenum of this file; gint is required by hash table */
129     GHashTable  *attributes;	/* all attributes for this file */
130 };
131 
132 struct amar_attr_s {
133     amar_file_t *file;		/* file for this attribute	*/
134     off_t        size;		/* size of the attribute        */
135     gint         attrid;	/* id of this attribute		*/
136     gboolean     wrote_eoa;	/* If the attribute is finished	*/
137     GThread     *thread;
138     int          fd;
139     int          eoa;
140     GError     **error;
141 };
142 
143 /*
144  * Internal functions
145  */
146 
147 static gboolean amar_attr_close_no_remove(amar_attr_t *attribute, GError **error);
148 static void amar_read_cb(void *cookie);
149 
150 GQuark
amar_error_quark(void)151 amar_error_quark(void)
152 {
153     static GQuark q;
154     if (!q)
155 	q = g_quark_from_static_string("amar_error");
156     return q;
157 }
158 
159 static gboolean
flush_buffer(amar_t * archive,GError ** error)160 flush_buffer(
161 	amar_t *archive,
162 	GError **error)
163 {
164     if (archive->buf_len) {
165 	if (full_write(archive->fd, archive->buf, archive->buf_len) != archive->buf_len) {
166 	    g_set_error(error, amar_error_quark(), errno,
167 			"Error writing to amanda archive: %s", strerror(errno));
168 	    return FALSE;
169 	}
170 	archive->buf_len = 0;
171     }
172 
173     return TRUE;
174 }
175 
176 static gboolean
write_header(amar_t * archive,GError ** error)177 write_header(
178 	amar_t *archive,
179 	GError **error)
180 {
181     /* if it won't fit in the buffer, take the easy way out and flush it */
182     if (archive->buf_len + HEADER_SIZE >= WRITE_BUFFER_SIZE - RECORD_SIZE) {
183 	if (!flush_buffer(archive, error))
184 	    return FALSE;
185     }
186 
187     memcpy(archive->buf + archive->buf_len, &archive->hdr, HEADER_SIZE);
188     archive->buf_len += HEADER_SIZE;
189     archive->position += HEADER_SIZE;
190 
191     return TRUE;
192 }
193 
194 static gboolean
write_record(amar_t * archive,amar_file_t * file,guint16 attrid,gboolean eoa,gpointer data,gsize data_size,GError ** error)195 write_record(
196 	amar_t *archive,
197 	amar_file_t *file,
198 	guint16  attrid,
199 	gboolean eoa,
200 	gpointer data,
201 	gsize data_size,
202 	GError **error)
203 {
204     /* the buffer always has room for a new record header */
205     MKRECORD(archive->buf + archive->buf_len, file->filenum, attrid, data_size, eoa);
206     archive->buf_len += RECORD_SIZE;
207 
208     /* is it worth copying this record into the buffer? */
209     if (archive->buf_len + RECORD_SIZE + data_size < WRITE_BUFFER_SIZE - RECORD_SIZE) {
210 	/* yes, it is */
211 	if (data_size)
212 	    memcpy(archive->buf + archive->buf_len, data, data_size);
213 	archive->buf_len += data_size;
214     } else {
215 	/* no, it's not */
216 	struct iovec iov[2];
217 
218 	/* flush the buffer and write the new data, all in one syscall */
219 	iov[0].iov_base = archive->buf;
220 	iov[0].iov_len = archive->buf_len;
221 	iov[1].iov_base = data;
222 	iov[1].iov_len = data_size;
223 	if (full_writev(archive->fd, iov, 2) < 0) {
224 	    g_set_error(error, amar_error_quark(), errno,
225 			"Error writing to amanda archive: %s", strerror(errno));
226 	    return FALSE;
227 	}
228 	archive->buf_len = 0;
229     }
230 
231     archive->position += data_size + RECORD_SIZE;
232     file->size += data_size + RECORD_SIZE;
233     return TRUE;
234 }
235 
236 /*
237  * Public functions
238  */
239 
240 amar_t *
amar_new(int fd,mode_t mode,GError ** error)241 amar_new(
242     int       fd,
243     mode_t mode,
244     GError **error)
245 {
246     amar_t *archive = malloc(SIZEOF(amar_t));
247 
248     /* make some sanity checks first */
249     g_assert(fd >= 0);
250     g_assert(mode == O_RDONLY || mode == O_WRONLY);
251 
252     archive->fd = fd;
253     archive->mode = mode;
254     archive->maxfilenum = 0;
255     archive->position = 0;
256     archive->seekable = TRUE; /* assume seekable until lseek() fails */
257     archive->files = g_hash_table_new(g_int_hash, g_int_equal);
258     archive->buf = NULL;
259 
260     if (mode == O_WRONLY) {
261 	archive->buf = g_malloc(WRITE_BUFFER_SIZE);
262 	archive->buf_size = WRITE_BUFFER_SIZE;
263     }
264     archive->buf_len = 0;
265 
266     if (mode == O_WRONLY) {
267 	/* preformat a header with our version number */
268 	bzero(archive->hdr.magic, HEADER_SIZE);
269 	snprintf(archive->hdr.magic, HEADER_SIZE,
270 	    HEADER_MAGIC " %d", HEADER_VERSION);
271 
272 	/* and write it out to start the file */
273 	if (!write_header(archive, error)) {
274 	    amar_close(archive, NULL); /* flushing buffer won't fail */
275 	    return NULL;
276 	}
277     }
278 
279     return archive;
280 }
281 
282 gboolean
amar_close(amar_t * archive,GError ** error)283 amar_close(
284     amar_t *archive,
285     GError **error)
286 {
287     gboolean success = TRUE;
288 
289     /* verify all files are done */
290     g_assert(g_hash_table_size(archive->files) == 0);
291 
292     if (!flush_buffer(archive, error))
293 	success = FALSE;
294 
295     g_hash_table_destroy(archive->files);
296     if (archive->buf) g_free(archive->buf);
297     amfree(archive);
298 
299     return success;
300 }
301 
302 off_t
amar_size(amar_t * archive)303 amar_size(
304     amar_t *archive)
305 {
306     return archive->position;
307 }
308 
309 /*
310  * Writing
311  */
312 
313 amar_file_t *
amar_new_file(amar_t * archive,char * filename_buf,gsize filename_len,off_t * header_offset,GError ** error)314 amar_new_file(
315     amar_t *archive,
316     char *filename_buf,
317     gsize filename_len,
318     off_t *header_offset,
319     GError **error)
320 {
321     amar_file_t *file = NULL;
322 
323     g_assert(archive->mode == O_WRONLY);
324     g_assert(filename_buf != NULL);
325 
326     /* set filename_len if it wasn't specified */
327     if (!filename_len)
328 	filename_len = strlen(filename_buf);
329     g_assert(filename_len != 0);
330 
331     if (filename_len > MAX_RECORD_DATA_SIZE) {
332 	g_set_error(error, amar_error_quark(), ENOSPC,
333 		    "filename is too long for an amanda archive");
334 	return NULL;
335     }
336 
337     /* pick a new, unused filenum */
338 
339     if (g_hash_table_size(archive->files) == 65535) {
340 	g_set_error(error, amar_error_quark(), ENOSPC,
341 		    "No more file numbers available");
342 	return NULL;
343     }
344 
345     do {
346 	gint filenum;
347 
348 	archive->maxfilenum++;
349 
350 	/* MAGIC_FILENUM can't be used because it matches the header record text */
351 	if (archive->maxfilenum == MAGIC_FILENUM) {
352 	    continue;
353 	}
354 
355 	/* see if this fileid is already in use */
356 	filenum = archive->maxfilenum;
357 	if (g_hash_table_lookup(archive->files, &filenum))
358 	    continue;
359 
360     } while (0);
361 
362     file = g_new0(amar_file_t, 1);
363     file->archive = archive;
364     file->filenum = archive->maxfilenum;
365     file->size = 0;
366     file->attributes = g_hash_table_new_full(g_int_hash, g_int_equal, NULL, g_free);
367     g_hash_table_insert(archive->files, &file->filenum, file);
368 
369     /* record the current position and write a header there, if desired */
370     if (header_offset) {
371 	*header_offset = archive->position;
372 	if (!write_header(archive, error))
373 	    goto error_exit;
374     }
375 
376     /* add a filename record */
377     if (!write_record(archive, file, AMAR_ATTR_FILENAME,
378 		      1, filename_buf, filename_len, error))
379 	goto error_exit;
380 
381     return file;
382 
383 error_exit:
384     if (file) {
385 	g_hash_table_remove(archive->files, &file->filenum);
386 	g_hash_table_destroy(file->attributes);
387 	g_free(file);
388     }
389     return NULL;
390 }
391 
392 off_t
amar_file_size(amar_file_t * file)393 amar_file_size(
394     amar_file_t *file)
395 {
396     return file->size;
397 }
398 
399 static void
foreach_attr_close(gpointer key G_GNUC_UNUSED,gpointer value,gpointer user_data)400 foreach_attr_close(
401 	gpointer key G_GNUC_UNUSED,
402 	gpointer value,
403 	gpointer user_data)
404 {
405     amar_attr_t *attr = value;
406     GError **error = user_data;
407 
408     if (attr->thread) {
409 	g_thread_join(attr->thread);
410 	attr->thread = NULL;
411     }
412 
413     /* return immediately if we've already seen an error */
414     if (*error)
415 	return;
416 
417     if (!attr->wrote_eoa) {
418 	amar_attr_close_no_remove(attr, error);
419     }
420 }
421 
422 gboolean
amar_file_close(amar_file_t * file,GError ** error)423 amar_file_close(
424     amar_file_t *file,
425     GError **error)
426 {
427     gboolean success = TRUE;
428     amar_t *archive = file->archive;
429 
430     /* close all attributes that haven't already written EOA */
431     g_hash_table_foreach(file->attributes, foreach_attr_close, error);
432     if (*error)
433 	success = FALSE;
434 
435     /* write an EOF record */
436     if (success) {
437 	if (!write_record(archive, file, AMAR_ATTR_EOF, 1,
438 			  NULL, 0, error))
439 	    success = FALSE;
440     }
441 
442     /* remove from archive->file list */
443     g_hash_table_remove(archive->files, &file->filenum);
444 
445     /* clean up */
446     g_hash_table_destroy(file->attributes);
447     amfree(file);
448 
449     return success;
450 }
451 
452 amar_attr_t *
amar_new_attr(amar_file_t * file,guint16 attrid,GError ** error G_GNUC_UNUSED)453 amar_new_attr(
454     amar_file_t *file,
455     guint16  attrid,
456     GError **error G_GNUC_UNUSED)
457 {
458     amar_attr_t *attribute;
459     gint attrid_gint = attrid;
460 
461     /* make sure this attrid isn't already present */
462     g_assert(attrid >= AMAR_ATTR_APP_START);
463     g_assert(g_hash_table_lookup(file->attributes, &attrid_gint) == NULL);
464 
465     attribute = malloc(SIZEOF(amar_attr_t));
466     attribute->file = file;
467     attribute->size = 0;
468     attribute->attrid = attrid;
469     attribute->wrote_eoa = FALSE;
470     attribute->thread = NULL;
471     attribute->fd = -1;
472     attribute->eoa = 0;
473     g_hash_table_replace(file->attributes, &attribute->attrid, attribute);
474 
475     /* (note this function cannot currently return an error) */
476 
477     return attribute;
478 }
479 
480 off_t
amar_attr_size(amar_attr_t * attr)481 amar_attr_size(
482     amar_attr_t *attr)
483 {
484     return attr->size;
485 }
486 
487 static gboolean
amar_attr_close_no_remove(amar_attr_t * attribute,GError ** error)488 amar_attr_close_no_remove(
489     amar_attr_t *attribute,
490     GError **error)
491 {
492     amar_file_t   *file    = attribute->file;
493     amar_t        *archive = file->archive;
494     gboolean rv = TRUE;
495 
496     if (attribute->thread) {
497 	g_thread_join(attribute->thread);
498 	attribute->thread = NULL;
499     }
500 
501     /* write an empty record with EOA_BIT set if we haven't ended
502      * this attribute already */
503     if (!attribute->wrote_eoa) {
504 	if (!write_record(archive, file, attribute->attrid,
505 			  1, NULL, 0, error))
506 	    rv = FALSE;
507 	attribute->wrote_eoa = TRUE;
508     }
509 
510     return rv;
511 }
512 
513 gboolean
amar_attr_close(amar_attr_t * attribute,GError ** error)514 amar_attr_close(
515     amar_attr_t *attribute,
516     GError **error)
517 {
518     amar_file_t *file = attribute->file;
519     gboolean     rv   = TRUE;
520     gint  attrid_gint = attribute->attrid;
521 
522     rv = amar_attr_close_no_remove(attribute, error);
523     g_hash_table_remove(file->attributes, &attrid_gint);
524 
525     return rv;
526 }
527 
528 gboolean
amar_attr_add_data_buffer(amar_attr_t * attribute,gpointer data,gsize size,gboolean eoa,GError ** error)529 amar_attr_add_data_buffer(
530     amar_attr_t *attribute,
531     gpointer data, gsize size,
532     gboolean eoa,
533     GError **error)
534 {
535     amar_file_t *file = attribute->file;
536     amar_t *archive = file->archive;
537 
538     g_assert(!attribute->wrote_eoa);
539 
540     /* write records until we've consumed all of the buffer */
541     while (size) {
542 	gsize rec_data_size;
543 	gboolean rec_eoa = FALSE;
544 
545 	if (size > MAX_RECORD_DATA_SIZE) {
546 	    rec_data_size = MAX_RECORD_DATA_SIZE;
547 	} else {
548 	    rec_data_size = size;
549 	    if (eoa)
550 		rec_eoa = TRUE;
551 	}
552 
553 	if (!write_record(archive, file, attribute->attrid,
554 			  rec_eoa, data, rec_data_size, error))
555 	    return FALSE;
556 
557 	data = (gchar *)data + rec_data_size;
558 	size -= rec_data_size;
559 	attribute->size += rec_data_size;
560     }
561 
562     if (eoa) {
563 	attribute->wrote_eoa = TRUE;
564     }
565 
566     return TRUE;
567 }
568 
569 static gpointer amar_attr_add_data_fd_thread(gpointer data);
570 off_t
amar_attr_add_data_fd_in_thread(amar_attr_t * attribute,int fd,gboolean eoa,GError ** error)571 amar_attr_add_data_fd_in_thread(
572     amar_attr_t *attribute,
573     int fd,
574     gboolean eoa,
575     GError **error)
576 {
577     attribute->fd = fd;
578     attribute->eoa = eoa;
579     attribute->error = error;
580     attribute->thread = g_thread_create(amar_attr_add_data_fd_thread, attribute, TRUE, NULL);
581     return 0;
582 }
583 
584 static gpointer
amar_attr_add_data_fd_thread(gpointer data)585 amar_attr_add_data_fd_thread(
586     gpointer data)
587 {
588     amar_attr_t *attribute = (amar_attr_t *)data;
589 
590     amar_attr_add_data_fd(attribute, attribute->fd, attribute->eoa, attribute->error);
591     close(attribute->fd);
592     attribute->fd = -1;
593     attribute->eoa = 0;
594     attribute->error = NULL;
595     return NULL;
596 }
597 
598 
599 off_t
amar_attr_add_data_fd(amar_attr_t * attribute,int fd,gboolean eoa,GError ** error)600 amar_attr_add_data_fd(
601     amar_attr_t *attribute,
602     int fd,
603     gboolean eoa,
604     GError **error)
605 {
606     amar_file_t   *file    = attribute->file;
607     amar_t        *archive = file->archive;
608     gssize size;
609     off_t filesize = 0;
610     gpointer buf = g_malloc(MAX_RECORD_DATA_SIZE);
611 
612     g_assert(!attribute->wrote_eoa);
613 
614     /* read and write until reaching EOF */
615     while ((size = full_read(fd, buf, MAX_RECORD_DATA_SIZE)) >= 0) {
616 	if (!write_record(archive, file, attribute->attrid,
617 			    eoa && (size < MAX_RECORD_DATA_SIZE), buf, size, error))
618 	    goto error_exit;
619 
620 	filesize += size;
621 	attribute->size += size;
622 
623 	if (size < MAX_RECORD_DATA_SIZE)
624 	    break;
625     }
626 
627     if (size < 0) {
628 	g_set_error(error, amar_error_quark(), errno,
629 		    "Error reading from fd %d: %s", fd, strerror(errno));
630 	goto error_exit;
631     }
632     g_free(buf);
633 
634     attribute->wrote_eoa = eoa;
635 
636     return filesize;
637 
638 error_exit:
639     g_free(buf);
640     return -1;
641 }
642 
643 /*
644  * Reading
645  */
646 
647 /* Note that this implementation assumes that an archive will have a "small"
648  * number of open files at any time, and a limited number of attributes for
649  * each file. */
650 
651 typedef struct attr_state_s {
652     guint16  attrid;
653     amar_attr_handling_t *handling;
654     int      fd;
655     gchar *buf;
656     gsize buf_len;
657     gsize buf_size;
658     gpointer attr_data;
659     gboolean wrote_eoa;
660 } attr_state_t;
661 
662 typedef struct file_state_s {
663     guint16  filenum;
664     gpointer file_data; /* user's data */
665     gboolean ignore;
666 
667     GSList *attr_states;
668 } file_state_t;
669 
670 /* buffer-handling macros and functions */
671 
672 /* Ensure that the archive buffer contains at least ATLEAST bytes.  Returns
673  * FALSE if that many bytes are not available due to EOF or another error. */
674 static gboolean
buf_atleast_(amar_t * archive,handling_params_t * hp,gsize atleast)675 buf_atleast_(
676     amar_t *archive,
677     handling_params_t *hp,
678     gsize atleast)
679 {
680     gsize to_read;
681     gsize bytes_read;
682 
683     /* easy case of hp->buf_len >= atleast is taken care of by the macro, below */
684 
685     if (hp->got_eof)
686 	return FALSE;
687 
688     /* If we just don't have space for this much data yet, then we'll have to reallocate
689      * the buffer */
690     if (hp->buf_size < atleast) {
691 	if (hp->buf_offset == 0) {
692 	    hp->buf = g_realloc(hp->buf, atleast);
693 	} else {
694 	    gpointer newbuf = g_malloc(atleast);
695 	    if (hp->buf) {
696 		memcpy(newbuf, hp->buf+hp->buf_offset, hp->buf_len);
697 		g_free(hp->buf);
698 	    }
699 	    hp->buf = newbuf;
700 	    hp->buf_offset = 0;
701 	}
702 	hp->buf_size = atleast;
703     }
704 
705     /* if we have space in this buffer to satisfy the request, but not without moving
706      * the existing data around, then move the data around */
707     else if (hp->buf_size - hp->buf_offset < atleast) {
708 	memmove(hp->buf, hp->buf+hp->buf_offset, hp->buf_len);
709 	hp->buf_offset = 0;
710     }
711 
712     /* as an optimization, if we just called lseek, then only read the requested
713      * bytes in case we're going to lseek again. */
714     if (hp->just_lseeked)
715 	to_read = atleast - hp->buf_len;
716     else
717 	to_read = hp->buf_size - hp->buf_offset - hp->buf_len;
718 
719     bytes_read = full_read(archive->fd,
720 			   hp->buf+hp->buf_offset+hp->buf_len,
721 			   to_read);
722     if (bytes_read < to_read)
723 	hp->got_eof = TRUE;
724     hp->just_lseeked = FALSE;
725 
726     hp->buf_len += bytes_read;
727 
728     return hp->buf_len >= atleast;
729 }
730 
731 #define buf_atleast(archive, hp, atleast) \
732     (((hp)->buf_len >= (atleast))? TRUE : buf_atleast_((archive), (hp), (atleast)))
733 
734 /* Skip the buffer ahead by SKIPBYTES bytes.  This will discard data from the
735  * buffer, and may call lseek() if some of the skipped bytes have not yet been
736  * read.  Returns FALSE if the requisite bytes cannot be skipped due to EOF or
737  * another error. */
738 static gboolean
buf_skip_(amar_t * archive,handling_params_t * hp,gsize skipbytes)739 buf_skip_(
740     amar_t *archive,
741     handling_params_t *hp,
742     gsize skipbytes)
743 {
744     /* easy case of buf_len > skipbytes is taken care of by the macro, below,
745      * so we know we're clearing out the entire buffer here */
746 
747     skipbytes -= hp->buf_len;
748     hp->buf_len = 0;
749 
750     hp->buf_offset = 0;
751 
752 retry:
753     if (archive->seekable) {
754 	if (lseek(archive->fd, skipbytes, SEEK_CUR) < 0) {
755 	    /* did we fail because archive->fd is a pipe or something? */
756 	    if (errno == ESPIPE) {
757 		archive->seekable = FALSE;
758 		goto retry;
759 	    }
760 	    hp->got_eof = TRUE;
761 	    return FALSE;
762 	}
763     } else {
764 	while (skipbytes) {
765 	    gsize toread = MIN(skipbytes, hp->buf_size);
766 	    gsize bytes_read = full_read(archive->fd, hp->buf, toread);
767 
768 	    if (bytes_read < toread) {
769 		hp->got_eof = TRUE;
770 		return FALSE;
771 	    }
772 
773 	    skipbytes -= bytes_read;
774 	}
775     }
776 
777     return TRUE;
778 }
779 
780 #define buf_skip(archive, hp, skipbytes) \
781     (((skipbytes) <= (hp)->buf_len) ? \
782 	((hp)->buf_len -= (skipbytes), \
783 	 (hp)->buf_offset += (skipbytes), \
784 	 TRUE) \
785       : buf_skip_((archive), (hp), (skipbytes)))
786 
787 /* Get a pointer to the current position in the buffer */
788 #define buf_ptr(hp) ((gchar *)(hp)->buf + (hp)->buf_offset)
789 
790 /* Get the amount of data currently available in the buffer */
791 #define buf_avail(hp) ((hp)->buf_len)
792 
793 static gboolean
finish_attr(handling_params_t * hp,file_state_t * fs,attr_state_t * as,gboolean truncated)794 finish_attr(
795     handling_params_t *hp,
796     file_state_t *fs,
797     attr_state_t *as,
798     gboolean truncated)
799 {
800     gboolean success = TRUE;
801     if (!as->wrote_eoa && as->handling && as->handling->callback) {
802 	success = as->handling->callback(hp->user_data, fs->filenum,
803 			fs->file_data, as->attrid, as->handling->attrid_data,
804 			&as->attr_data, as->buf, as->buf_len, TRUE, truncated);
805     }
806     amfree(as->buf);
807 
808     return success;
809 }
810 
811 static gboolean
finish_file(handling_params_t * hp,file_state_t * fs,gboolean truncated)812 finish_file(
813     handling_params_t *hp,
814     file_state_t *fs,
815     gboolean truncated)
816 {
817     GSList *iter;
818     gboolean success = TRUE;
819 
820     /* free up any attributes not yet ended */
821     for (iter = fs->attr_states; iter; iter = iter->next) {
822 	attr_state_t *as = (attr_state_t *)iter->data;
823 	success = success && finish_attr(hp, fs, as, TRUE);
824     }
825     slist_free_full(fs->attr_states, g_free);
826     fs->attr_states = NULL;
827 
828     if (hp->file_finish_cb && !fs->ignore)
829 	success = success && hp->file_finish_cb(hp->user_data, fs->filenum,
830 					        &fs->file_data, truncated);
831 
832     return success;
833 }
834 
835 static gboolean
read_done(handling_params_t * hp)836 read_done(
837     handling_params_t *hp)
838 {
839     if (hp->done_cb) {
840 	return hp->done_cb(hp->user_data, *hp->error);
841     }
842     return TRUE;
843 }
844 
845 /* buffer the data and/or call the callback for this attribute */
846 static gboolean
handle_hunk(handling_params_t * hp,file_state_t * fs,attr_state_t * as,amar_attr_handling_t * hdl,gpointer buf,gsize len,gboolean eoa)847 handle_hunk(
848     handling_params_t *hp,
849     file_state_t *fs,
850     attr_state_t *as,
851     amar_attr_handling_t *hdl,
852     gpointer buf,
853     gsize len,
854     gboolean eoa)
855 {
856     gboolean success = TRUE;
857 
858     /* capture any conditions where we don't have to copy into the buffer */
859     if (hdl->min_size == 0 || (as->buf_len == 0 && len >= hdl->min_size)) {
860 	success = success && hdl->callback(hp->user_data, fs->filenum,
861 		fs->file_data, as->attrid, hdl->attrid_data, &as->attr_data,
862 		buf, len, eoa, FALSE);
863 	as->wrote_eoa = eoa;
864     } else {
865 	/* ok, copy into the buffer */
866 	if (as->buf_len + len > as->buf_size) {
867 	    gpointer newbuf = g_malloc(as->buf_len + len);
868 	    if (as->buf) {
869 		memcpy(newbuf, as->buf, as->buf_len);
870 		g_free(as->buf);
871 	    }
872 	    as->buf = newbuf;
873 	    as->buf_size = as->buf_len + len;
874 	}
875 	memcpy(as->buf + as->buf_len, buf, len);
876 	as->buf_len += len;
877 
878 	/* and call the callback if we have enough data or if this is the last attr */
879 	if (as->buf_len >= hdl->min_size || eoa) {
880 	    success = success && hdl->callback(hp->user_data, fs->filenum,
881 		    fs->file_data, as->attrid, hdl->attrid_data, &as->attr_data,
882 		    as->buf, as->buf_len, eoa, FALSE);
883 	    as->buf_len = 0;
884 	    as->wrote_eoa = eoa;
885 	}
886     }
887 
888     return success;
889 }
890 
amar_read_to(amar_t * archive,guint16 filenum,guint16 attrid,int fd)891 void amar_read_to(
892     amar_t   *archive,
893     guint16   filenum,
894     guint16   attrid,
895     int       fd)
896 {
897     file_state_t *fs = NULL;
898     attr_state_t *as = NULL;
899     GSList *iter;
900 
901     /* find the file_state_t, if it exists */
902     for (iter = archive->hp->file_states; iter; iter = iter->next) {
903 	if (((file_state_t *)iter->data)->filenum == filenum) {
904 	    fs = (file_state_t *)iter->data;
905 	    break;
906 	}
907     }
908 
909     if (!fs) {
910 	fs = g_new0(file_state_t, 1);
911 	fs->filenum = filenum;
912 	archive->hp->file_states = g_slist_prepend(archive->hp->file_states, fs);
913     }
914 
915     /* find the attr_state_t, if it exists */
916     for (iter = fs->attr_states; iter; iter = iter->next) {
917 	if (((attr_state_t *)(iter->data))->attrid == attrid) {
918 	    as = (attr_state_t *)(iter->data);
919 	    break;
920 	}
921     }
922 
923     if (!as) {
924 	amar_attr_handling_t *hdl = archive->hp->handling_array;
925 	for (hdl = archive->hp->handling_array; hdl->attrid != 0; hdl++) {
926 	    if (hdl->attrid == attrid)
927 		break;
928 	}
929 	as = g_new0(attr_state_t, 1);
930         as->attrid = attrid;
931         as->handling = hdl;
932         fs->attr_states = g_slist_prepend(fs->attr_states, as);
933     }
934 
935     as->fd = fd;
936 }
937 
amar_stop_read(amar_t * archive)938 void amar_stop_read(
939     amar_t   *archive)
940 {
941     if (archive->hp->event_read_extract) {
942 	event_release(archive->hp->event_read_extract);
943 	archive->hp->event_read_extract = NULL;
944     }
945 }
946 
amar_start_read(amar_t * archive)947 void amar_start_read(
948     amar_t   *archive)
949 {
950     if (!archive->hp->event_read_extract) {
951 	archive->hp->event_read_extract = event_register(archive->fd, EV_READFD,
952 						         amar_read_cb, archive);
953     }
954 }
955 
956 static void
amar_read_cb(void * cookie)957 amar_read_cb(
958     void *cookie)
959 {
960     amar_t *archive = cookie;
961     ssize_t count;
962     size_t  need_bytes = 0;
963     guint16  filenum;
964     guint16  attrid;
965     guint32  datasize;
966     gboolean eoa;
967     file_state_t *fs = NULL;
968     attr_state_t *as = NULL;
969     GSList *iter;
970     amar_attr_handling_t *hdl;
971     gboolean success = TRUE;
972     handling_params_t *hp = archive->hp;
973 
974     hp = archive->hp;
975 
976     count = read(archive->fd, hp->buf + hp->buf_offset + hp->buf_len,
977 			      hp->buf_size - hp->buf_len - hp->buf_offset);
978     if (count == -1) {
979 	int save_errno = errno;
980 	g_debug("failed to read archive: %s", strerror(save_errno));
981 	g_set_error(hp->error, amar_error_quark(), save_errno,
982 			"failed to read archive: %s", strerror(save_errno));
983     }
984     hp->buf_len += count;
985 
986     /* process all complete records */
987     while (hp->buf_len >= RECORD_SIZE && hp->event_read_extract) {
988 	as = NULL;
989 	fs = NULL;
990 	GETRECORD(buf_ptr(hp), filenum, attrid, datasize, eoa);
991 	if (filenum == MAGIC_FILENUM) {
992 	    int vers;
993 
994 	    /* HEADER_RECORD */
995 	    if (hp->buf_len < HEADER_SIZE) {
996 		/* not a complete header */
997 		need_bytes = HEADER_SIZE;
998 		break;
999 	    }
1000 
1001 	    if (sscanf(buf_ptr(hp), HEADER_MAGIC " %d", &vers) != 1) {
1002 		g_set_error(hp->error, amar_error_quark(), EINVAL,
1003 			    "Invalid archive header");
1004 		read_done(archive->hp);
1005 		return;
1006 	    }
1007 
1008 	    if (vers > HEADER_VERSION) {
1009 		g_set_error(hp->error, amar_error_quark(), EINVAL,
1010 			    "Archive version %d is not supported", vers);
1011 		read_done(archive->hp);
1012 		return;
1013 	    }
1014 
1015 	    /* skip the header block */
1016 	    hp->buf_offset += HEADER_SIZE;
1017 	    hp->buf_len    -= HEADER_SIZE;
1018 	    continue; /* go to next record */
1019 
1020 	} else if (datasize > MAX_RECORD_DATA_SIZE) {
1021 	    g_set_error(hp->error, amar_error_quark(), EINVAL,
1022 			"Invalid record: data size must be less than %d",
1023 			MAX_RECORD_DATA_SIZE);
1024 	    read_done(archive->hp);
1025 	    return;
1026 
1027 	} else if (hp->buf_len < RECORD_SIZE + datasize) {
1028 	    /* not a complete record */
1029 	    need_bytes = RECORD_SIZE + datasize;
1030 	    break;
1031 	}
1032 
1033 	/* find the file_state_t, if it exists */
1034 	for (iter = hp->file_states; iter; iter = iter->next) {
1035 	    if (((file_state_t *)iter->data)->filenum == filenum) {
1036 		fs = (file_state_t *)iter->data;
1037 		break;
1038 	    }
1039 	}
1040 
1041 	/* get the "special" attributes out of the way */
1042         if (G_UNLIKELY(attrid < AMAR_ATTR_APP_START)) {
1043 	    if (attrid == AMAR_ATTR_EOF) {
1044 		if (datasize != 0) {
1045 		    g_set_error(hp->error, amar_error_quark(), EINVAL,
1046 				"Archive contains an EOF record with nonzero size");
1047 		    read_done(archive->hp);
1048 		    return;
1049 		}
1050 		hp->buf_offset += RECORD_SIZE;
1051 		hp->buf_len    -= RECORD_SIZE;
1052 		if (fs) {
1053 		    hp->file_states = g_slist_remove(hp->file_states, fs);
1054 		    success = finish_file(hp, fs, FALSE);
1055 		    g_free(fs);
1056 		    fs = NULL;
1057 		    if (!success)
1058 			break;
1059 		}
1060 		continue;
1061 	    } else if (attrid == AMAR_ATTR_FILENAME) {
1062 		if (fs) {
1063 		    /* TODO: warn - previous file did not end correctly */
1064 		    hp->file_states = g_slist_remove(hp->file_states, fs);
1065 		    success = finish_file(hp, fs, TRUE);
1066 		    g_free(fs);
1067 		    fs = NULL;
1068 		    if (!success)
1069 			break;
1070 		}
1071 
1072 		if (datasize == 0) {
1073 		    unsigned int i, nul_padding = 1;
1074 		    char *bb;
1075 		    /* try to detect NULL padding bytes */
1076 		    if (hp->buf_len < 512 - RECORD_SIZE) {
1077 			/* close to end of file */
1078 			break;
1079 		    }
1080 		    hp->buf_offset += RECORD_SIZE;
1081 		    hp->buf_len    -= RECORD_SIZE;
1082 		    bb = buf_ptr(hp);
1083 		    /* check all byte == 0 */
1084 		    for (i=0; i<512 - RECORD_SIZE; i++) {
1085 			if (*bb++ != 0)
1086 			    nul_padding = 0;
1087 		    }
1088 		    hp->buf_offset += datasize;
1089 		    hp->buf_len    -= datasize;
1090 		    if (nul_padding) {
1091 			break;
1092 		    }
1093 		    g_set_error(hp->error, amar_error_quark(), EINVAL,
1094 				"Archive file %d has an empty filename",
1095 				(int)filenum);
1096 		    read_done(archive->hp);
1097 		    return;
1098 		}
1099 
1100 		if (!eoa) {
1101 		    g_set_error(hp->error, amar_error_quark(), EINVAL,
1102 				"Filename record for fileid %d does "
1103 				"not have its EOA bit set", (int)filenum);
1104 		    hp->buf_offset += (RECORD_SIZE + datasize);
1105 		    hp->buf_len    -= (RECORD_SIZE + datasize);
1106 		    read_done(archive->hp);
1107 		    return;
1108 		}
1109 
1110 		fs = g_new0(file_state_t, 1);
1111 		fs->filenum = filenum;
1112 		hp->file_states = g_slist_prepend(hp->file_states, fs);
1113 
1114 		if (hp->file_start_cb) {
1115 		    hp->buf_offset += RECORD_SIZE;
1116 		    hp->buf_len    -= RECORD_SIZE;
1117 		    success = hp->file_start_cb(hp->user_data, filenum,
1118 				buf_ptr(hp), datasize,
1119 				&fs->ignore, &fs->file_data);
1120 		    hp->buf_offset += datasize;
1121 		    hp->buf_len    -= datasize;
1122 		    if (!success)
1123 			break;
1124 		}
1125 		continue;
1126 
1127 	    } else {
1128 		g_set_error(hp->error, amar_error_quark(), EINVAL,
1129 			    "Unknown attribute id %d in archive file %d",
1130 			    (int)attrid, (int)filenum);
1131 		read_done(archive->hp);
1132 		return;
1133 	    }
1134 	}
1135 
1136 	/* if this is an unrecognized file or a known file that's being
1137 	 * ignored, then skip it. */
1138 	if (!fs || fs->ignore) {
1139 	    hp->buf_offset += (RECORD_SIZE + datasize);
1140 	    hp->buf_len    -= (RECORD_SIZE + datasize);
1141 	    continue;
1142 	}
1143 
1144 	/* ok, this is an application attribute.  Look up its as, if it exists. */
1145 	for (iter = fs->attr_states; iter; iter = iter->next) {
1146 	    if (((attr_state_t *)(iter->data))->attrid == attrid) {
1147 		as = (attr_state_t *)(iter->data);
1148 		break;
1149 	    }
1150 	}
1151 
1152 	/* and get the proper handling for that attribute */
1153 	if (as) {
1154 	    hdl = as->handling;
1155 	} else {
1156 	    hdl = hp->handling_array;
1157 	    for (hdl = hp->handling_array; hdl->attrid != 0; hdl++) {
1158 		if (hdl->attrid == attrid)
1159 		    break;
1160 	    }
1161 	}
1162 
1163 	/* As a shortcut, if this is a one-record attribute, handle it without
1164 	 * creating a new attribute_state_t. */
1165 	if (eoa && !as) {
1166 	    gpointer tmp = NULL;
1167 	    if (hdl->callback) {
1168 		hp->buf_offset += RECORD_SIZE;
1169 		hp->buf_len    -= RECORD_SIZE;
1170 		success = hdl->callback(hp->user_data, filenum, fs->file_data, attrid,
1171 					hdl->attrid_data, &tmp, buf_ptr(hp), datasize, eoa, FALSE);
1172 		hp->buf_offset += datasize;
1173 		hp->buf_len    -= datasize;
1174 		if (!success)
1175 		    break;
1176 		continue;
1177 	    } else {
1178 		/* no callback -> just skip it */
1179 		hp->buf_offset += (RECORD_SIZE + datasize);
1180 		hp->buf_len    -= (RECORD_SIZE + datasize);
1181 		continue;
1182 	    }
1183 
1184 	}
1185 
1186 	/* ok, set up a new attribute state */
1187 	if (!as) {
1188 	    as = g_new0(attr_state_t, 1);
1189 	    as->fd = -1;
1190 	    as->attrid = attrid;
1191 	    as->handling = hdl;
1192 	    fs->attr_states = g_slist_prepend(fs->attr_states, as);
1193 	}
1194 
1195 	hp->buf_offset += RECORD_SIZE;
1196 	hp->buf_len    -= RECORD_SIZE;
1197 	if (as->fd != -1) {
1198 	    int count = full_write(as->fd, buf_ptr(hp), datasize);
1199 	    hp->buf_offset += datasize;
1200 	    hp->buf_len    -= datasize;
1201 	    if (count != (gint32)datasize)
1202 		break;
1203 	    if (eoa) {
1204 		as->wrote_eoa = eoa;
1205 	    }
1206 	} else if (hdl->callback) {
1207 	    success = handle_hunk(hp, fs, as, hdl, buf_ptr(hp), datasize, eoa);
1208 	    hp->buf_offset += datasize;
1209 	    hp->buf_len    -= datasize;
1210 	    if (!success)
1211 		break;
1212 	} else {
1213 	    hp->buf_offset += datasize;
1214 	    hp->buf_len    -= datasize;
1215 	}
1216 
1217 	/* finish the attribute if this is its last record */
1218 	if (eoa) {
1219 	    success = finish_attr(hp, fs, as, FALSE);
1220 	    fs->attr_states = g_slist_remove(fs->attr_states, as);
1221 	    g_free(as);
1222 	    as = NULL;
1223 	    if (!success)
1224 		break;
1225         }
1226     }
1227 
1228     /* increase buffer if needed */
1229     if (need_bytes > hp->buf_size) {
1230 	char *new_buf = g_malloc(need_bytes);
1231 	memcpy(new_buf, hp->buf + hp->buf_offset, hp->buf_len);
1232 	g_free(hp->buf);
1233 	hp->buf = new_buf;
1234 	hp->buf_offset = 0;
1235 	hp->buf_size = need_bytes;
1236     } else if (hp->buf_offset > 0) {
1237 	/* move data at begining of buffer */
1238 	memmove(hp->buf, hp->buf + hp->buf_offset, hp->buf_len);
1239 	hp->buf_offset = 0;
1240     }
1241 
1242     if (count == -1 || count == 0) {
1243 	if (count == 0 && hp->buf_len != 0) {
1244 	    g_set_error(hp->error, amar_error_quark(), EINVAL,
1245 			    "Archive ended with a partial record");
1246 	}
1247 	hp->got_eof = TRUE;
1248 	amar_stop_read(archive);
1249 
1250 	/* close any open files, assuming that they have been truncated */
1251 	for (iter = hp->file_states; iter; iter = iter->next) {
1252 	    file_state_t *fs = (file_state_t *)iter->data;
1253 	    finish_file(hp, fs, TRUE);
1254 	}
1255 	slist_free_full(hp->file_states, g_free);
1256 	read_done(hp);
1257 	g_free(hp->buf);
1258 	g_free(hp);
1259 	archive->hp = NULL;
1260     }
1261 }
1262 
1263 event_fn_t
set_amar_read_cb(amar_t * archive,gpointer user_data,amar_attr_handling_t * handling_array,amar_file_start_callback_t file_start_cb,amar_file_finish_callback_t file_finish_cb,amar_done_callback_t done_cb,GError ** error)1264 set_amar_read_cb(
1265 	amar_t *archive,
1266 	gpointer user_data,
1267 	amar_attr_handling_t *handling_array,
1268 	amar_file_start_callback_t file_start_cb,
1269 	amar_file_finish_callback_t file_finish_cb,
1270 	amar_done_callback_t done_cb,
1271 	GError **error)
1272 {
1273     handling_params_t *hp = g_new0(handling_params_t, 1);
1274 
1275     g_assert(archive->mode == O_RDONLY);
1276 
1277     hp->user_data = user_data;
1278     hp->handling_array = handling_array;
1279     hp->file_start_cb = file_start_cb;
1280     hp->file_finish_cb = file_finish_cb;
1281     hp->done_cb = done_cb;
1282     hp->error = error;
1283     hp->file_states = NULL;
1284     hp->buf_len = 0;
1285     hp->buf_offset = 0;
1286     hp->buf_size = 65536; /* use a 64K buffer to start */
1287     hp->buf = g_malloc(hp->buf_size);
1288     hp->got_eof = FALSE;
1289     hp->just_lseeked = FALSE;
1290     archive->hp = hp;
1291 
1292     amar_start_read(archive);
1293     return amar_read_cb;
1294 }
1295 
1296 gboolean
amar_read(amar_t * archive,gpointer user_data,amar_attr_handling_t * handling_array,amar_file_start_callback_t file_start_cb,amar_file_finish_callback_t file_finish_cb,amar_done_callback_t done_cb,GError ** error)1297 amar_read(
1298 	amar_t *archive,
1299 	gpointer user_data,
1300 	amar_attr_handling_t *handling_array,
1301 	amar_file_start_callback_t file_start_cb,
1302 	amar_file_finish_callback_t file_finish_cb,
1303 	amar_done_callback_t done_cb,
1304 	GError **error)
1305 {
1306     file_state_t *fs = NULL;
1307     attr_state_t *as = NULL;
1308     GSList *iter;
1309     handling_params_t hp;
1310     guint16  filenum;
1311     guint16  attrid;
1312     guint32  datasize;
1313     gboolean eoa;
1314     amar_attr_handling_t *hdl;
1315     gboolean success = TRUE;
1316 
1317     g_assert(archive->mode == O_RDONLY);
1318 
1319     hp.user_data = user_data;
1320     hp.handling_array = handling_array;
1321     hp.file_start_cb = file_start_cb;
1322     hp.file_finish_cb = file_finish_cb;
1323     hp.done_cb = done_cb;
1324     hp.file_states = NULL;
1325     hp.buf_len = 0;
1326     hp.buf_offset = 0;
1327     hp.buf_size = 1024; /* use a 1K buffer to start */
1328     hp.buf = g_malloc(hp.buf_size);
1329     hp.got_eof = FALSE;
1330     hp.just_lseeked = FALSE;
1331 
1332     /* check that we are starting at a header record, but don't advance
1333      * the buffer past it */
1334     if (buf_atleast(archive, &hp, RECORD_SIZE)) {
1335 	GETRECORD(buf_ptr(&hp), filenum, attrid, datasize, eoa);
1336 	if (filenum != MAGIC_FILENUM) {
1337 	    g_set_error(error, amar_error_quark(), EINVAL,
1338 			"Archive read does not begin at a header record");
1339 	    return FALSE;
1340 	}
1341     }
1342 
1343     while (1) {
1344 	if (!buf_atleast(archive, &hp, RECORD_SIZE))
1345 	    break;
1346 
1347 	GETRECORD(buf_ptr(&hp), filenum, attrid, datasize, eoa);
1348 
1349 	/* handle headers specially */
1350 	if (G_UNLIKELY(filenum == MAGIC_FILENUM)) {
1351 	    int vers;
1352 
1353 	    /* bail if an EOF occurred in the middle of the header */
1354 	    if (!buf_atleast(archive, &hp, HEADER_SIZE))
1355 		break;
1356 
1357 	    if (sscanf(buf_ptr(&hp), HEADER_MAGIC " %d", &vers) != 1) {
1358 		g_set_error(error, amar_error_quark(), EINVAL,
1359 			    "Invalid archive header");
1360 		return FALSE;
1361 	    }
1362 
1363 	    if (vers > HEADER_VERSION) {
1364 		g_set_error(error, amar_error_quark(), EINVAL,
1365 			    "Archive version %d is not supported", vers);
1366 		return FALSE;
1367 	    }
1368 
1369 	    buf_skip(archive, &hp, HEADER_SIZE);
1370 
1371 	    continue;
1372 	}
1373 
1374 	buf_skip(archive, &hp, RECORD_SIZE);
1375 
1376 	if (datasize > MAX_RECORD_DATA_SIZE) {
1377 	    g_set_error(error, amar_error_quark(), EINVAL,
1378 			"Invalid record: data size must be less than %d",
1379 			MAX_RECORD_DATA_SIZE);
1380 	    return FALSE;
1381 	}
1382 
1383 	/* find the file_state_t, if it exists */
1384 	if (!fs || fs->filenum != filenum) {
1385 	    fs = NULL;
1386 	    for (iter = hp.file_states; iter; iter = iter->next) {
1387 		if (((file_state_t *)iter->data)->filenum == filenum) {
1388 		    fs = (file_state_t *)iter->data;
1389 		    break;
1390 		}
1391 	    }
1392 	}
1393 
1394 	/* get the "special" attributes out of the way */
1395 	if (G_UNLIKELY(attrid < AMAR_ATTR_APP_START)) {
1396 	    if (attrid == AMAR_ATTR_EOF) {
1397 		if (datasize != 0) {
1398 		    g_set_error(error, amar_error_quark(), EINVAL,
1399 				"Archive contains an EOF record with nonzero size");
1400 		    return FALSE;
1401 		}
1402 		if (fs) {
1403 		    success = finish_file(&hp, fs, FALSE);
1404 		    hp.file_states = g_slist_remove(hp.file_states, fs);
1405 		    as = NULL;
1406 		    g_free(fs);
1407 		    fs = NULL;
1408 		    if (!success)
1409 			break;
1410 		}
1411 		continue;
1412 	    } else if (attrid == AMAR_ATTR_FILENAME) {
1413 		/* for filenames, we need the whole filename in the buffer */
1414 		if (!buf_atleast(archive, &hp, datasize))
1415 		    break;
1416 
1417 		if (fs) {
1418 		    /* TODO: warn - previous file did not end correctly */
1419 		    success = finish_file(&hp, fs, TRUE);
1420 		    hp.file_states = g_slist_remove(hp.file_states, fs);
1421 		    as = NULL;
1422 		    g_free(fs);
1423 		    fs = NULL;
1424 		    if (!success)
1425 			break;
1426 		}
1427 
1428 		if (!datasize) {
1429 		    unsigned int i, nul_padding = 1;
1430 		    char *bb;
1431 		    /* try to detect NULL padding bytes */
1432 		    if (!buf_atleast(archive, &hp, 512 - RECORD_SIZE)) {
1433 			/* close to end of file */
1434 			break;
1435 		    }
1436 		    bb = buf_ptr(&hp);
1437 		    /* check all byte == 0 */
1438 		    for (i=0; i<512 - RECORD_SIZE; i++) {
1439 			if (*bb++ != 0)
1440 			    nul_padding = 0;
1441 		    }
1442 		    if (nul_padding) {
1443 			break;
1444 		    }
1445 		    g_set_error(error, amar_error_quark(), EINVAL,
1446 				"Archive file %d has an empty filename",
1447 				(int)filenum);
1448 		    return FALSE;
1449 		}
1450 
1451 		if (!eoa) {
1452 		    g_set_error(error, amar_error_quark(), EINVAL,
1453 				"Filename record for fileid %d does "
1454 				"not have its EOA bit set", (int)filenum);
1455 		    return FALSE;
1456 		}
1457 
1458 		fs = g_new0(file_state_t, 1);
1459 		fs->filenum = filenum;
1460 		hp.file_states = g_slist_prepend(hp.file_states, fs);
1461 
1462 		if (hp.file_start_cb) {
1463 		    success = hp.file_start_cb(hp.user_data, filenum,
1464 			    buf_ptr(&hp), datasize,
1465 			    &fs->ignore, &fs->file_data);
1466 		    if (!success)
1467 			break;
1468 		}
1469 
1470 		buf_skip(archive, &hp, datasize);
1471 
1472 		continue;
1473 	    } else {
1474 		g_set_error(error, amar_error_quark(), EINVAL,
1475 			    "Unknown attribute id %d in archive file %d",
1476 			    (int)attrid, (int)filenum);
1477 		return FALSE;
1478 	    }
1479 	}
1480 
1481 	/* if this is an unrecognized file or a known file that's being
1482 	 * ignored, then skip it. */
1483 	if (!fs || fs->ignore) {
1484 	    buf_skip(archive, &hp, datasize);
1485 	    continue;
1486 	}
1487 
1488 	/* ok, this is an application attribute.  Look up its as, if it exists. */
1489 	if (!as || as->attrid != attrid) {
1490 	    as = NULL;
1491 	    for (iter = fs->attr_states; iter; iter = iter->next) {
1492 		if (((attr_state_t *)(iter->data))->attrid == attrid) {
1493 		    as = (attr_state_t *)(iter->data);
1494 		    break;
1495 		}
1496 	    }
1497 	}
1498 
1499 	/* and get the proper handling for that attribute */
1500 	if (as) {
1501 	    hdl = as->handling;
1502 	} else {
1503 	    hdl = hp.handling_array;
1504 	    for (hdl = hp.handling_array; hdl->attrid != 0; hdl++) {
1505 		if (hdl->attrid == attrid)
1506 		    break;
1507 	    }
1508 	}
1509 
1510 	/* As a shortcut, if this is a one-record attribute, handle it without
1511 	 * creating a new attribute_state_t. */
1512 	if (eoa && !as) {
1513 	    gpointer tmp = NULL;
1514 	    if (hdl->callback) {
1515 		/* a simple single-part callback */
1516 		if (buf_avail(&hp) >= datasize) {
1517 		    success = hdl->callback(hp.user_data, filenum, fs->file_data, attrid,
1518 			    hdl->attrid_data, &tmp, buf_ptr(&hp), datasize, eoa, FALSE);
1519 		    if (!success)
1520 			break;
1521 		    buf_skip(archive, &hp, datasize);
1522 		    continue;
1523 		}
1524 
1525 		/* we only have part of the data, but if it's big enough to exceed
1526 		 * the attribute's min_size, then just call the callback for each
1527 		 * part of the data */
1528 		else if (buf_avail(&hp) >= hdl->min_size) {
1529 		    gsize firstpart = buf_avail(&hp);
1530 		    gsize lastpart = datasize - firstpart;
1531 
1532 		    success = hdl->callback(hp.user_data, filenum, fs->file_data, attrid,
1533 			    hdl->attrid_data, &tmp, buf_ptr(&hp), firstpart, FALSE, FALSE);
1534 		    if (!success)
1535 			break;
1536 		    buf_skip(archive, &hp, firstpart);
1537 
1538 		    if (!buf_atleast(archive, &hp, lastpart))
1539 			break;
1540 
1541 		    success = hdl->callback(hp.user_data, filenum, fs->file_data, attrid,
1542 			    hdl->attrid_data, &tmp, buf_ptr(&hp), lastpart, eoa, FALSE);
1543 		    if (!success)
1544 			break;
1545 		    buf_skip(archive, &hp, lastpart);
1546 		    continue;
1547 		}
1548 	    } else {
1549 		/* no callback -> just skip it */
1550 		buf_skip(archive, &hp, datasize);
1551 		continue;
1552 	    }
1553 	}
1554 
1555 	/* ok, set up a new attribute state */
1556 	if (!as) {
1557 	    as = g_new0(attr_state_t, 1);
1558 	    as->attrid = attrid;
1559 	    as->handling = hdl;
1560 	    fs->attr_states = g_slist_prepend(fs->attr_states, as);
1561 	}
1562 
1563 	if (hdl->callback) {
1564 	    /* handle the data as one or two hunks, depending on whether it's
1565 	     * all in the buffer right now */
1566 	    if (buf_avail(&hp) >= datasize) {
1567 		success = handle_hunk(&hp, fs, as, hdl, buf_ptr(&hp), datasize, eoa);
1568 		if (!success)
1569 		    break;
1570 		buf_skip(archive, &hp, datasize);
1571 	    } else {
1572 		gsize hunksize = buf_avail(&hp);
1573 		success = handle_hunk(&hp, fs, as, hdl, buf_ptr(&hp), hunksize, FALSE);
1574 		if (!success)
1575 		    break;
1576 		buf_skip(archive, &hp, hunksize);
1577 
1578 		hunksize = datasize - hunksize;
1579 		if (!buf_atleast(archive, &hp, hunksize))
1580 		    break;
1581 
1582 		handle_hunk(&hp, fs, as, hdl, buf_ptr(&hp), hunksize, eoa);
1583 		buf_skip(archive, &hp, hunksize);
1584 	    }
1585 	} else {
1586 	    buf_skip(archive, &hp, datasize);
1587 	}
1588 
1589 	/* finish the attribute if this is its last record */
1590 	if (eoa) {
1591 	    success = finish_attr(&hp, fs, as, FALSE);
1592 	    fs->attr_states = g_slist_remove(fs->attr_states, as);
1593 	    g_free(as);
1594 	    as = NULL;
1595 	    if (!success)
1596 		break;
1597 	}
1598     }
1599 
1600     /* close any open files, assuming that they have been truncated */
1601 
1602     for (iter = hp.file_states; iter; iter = iter->next) {
1603 	file_state_t *fs = (file_state_t *)iter->data;
1604 	finish_file(&hp, fs, TRUE);
1605     }
1606     slist_free_full(hp.file_states, g_free);
1607     g_free(hp.buf);
1608 
1609     return success;
1610 }
1611 
amar_set_error(amar_t * archive,char * msg)1612 void amar_set_error(
1613     amar_t *archive,
1614     char *msg)
1615 {
1616     g_set_error(archive->hp->error, amar_error_quark(), EINVAL, "%s",
1617 		g_strdup(msg));
1618     amar_stop_read(archive);
1619     read_done(archive->hp);
1620 }
1621