1cdf63a70SMartin Matuska /*-
2cdf63a70SMartin Matuska  * Copyright (c) 2014 Sebastian Freundt
3cdf63a70SMartin Matuska  * Author: Sebastian Freundt  <devel@fresse.org>
4cdf63a70SMartin Matuska  *
5cdf63a70SMartin Matuska  * All rights reserved.
6cdf63a70SMartin Matuska  *
7cdf63a70SMartin Matuska  * Redistribution and use in source and binary forms, with or without
8cdf63a70SMartin Matuska  * modification, are permitted provided that the following conditions
9cdf63a70SMartin Matuska  * are met:
10cdf63a70SMartin Matuska  * 1. Redistributions of source code must retain the above copyright
11cdf63a70SMartin Matuska  *    notice, this list of conditions and the following disclaimer.
12cdf63a70SMartin Matuska  * 2. Redistributions in binary form must reproduce the above copyright
13cdf63a70SMartin Matuska  *    notice, this list of conditions and the following disclaimer in the
14cdf63a70SMartin Matuska  *    documentation and/or other materials provided with the distribution.
15cdf63a70SMartin Matuska  *
16cdf63a70SMartin Matuska  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17cdf63a70SMartin Matuska  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18cdf63a70SMartin Matuska  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19cdf63a70SMartin Matuska  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20cdf63a70SMartin Matuska  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21cdf63a70SMartin Matuska  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22cdf63a70SMartin Matuska  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23cdf63a70SMartin Matuska  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24cdf63a70SMartin Matuska  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25cdf63a70SMartin Matuska  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26cdf63a70SMartin Matuska  */
27cdf63a70SMartin Matuska 
28cdf63a70SMartin Matuska #include "archive_platform.h"
29cdf63a70SMartin Matuska 
30cdf63a70SMartin Matuska #ifdef HAVE_ERRNO_H
31cdf63a70SMartin Matuska #include <errno.h>
32cdf63a70SMartin Matuska #endif
33cdf63a70SMartin Matuska #include <stdio.h>
34cdf63a70SMartin Matuska #ifdef HAVE_STDLIB_H
35cdf63a70SMartin Matuska #include <stdlib.h>
36cdf63a70SMartin Matuska #endif
37cdf63a70SMartin Matuska #ifdef HAVE_STRING_H
38cdf63a70SMartin Matuska #include <string.h>
39cdf63a70SMartin Matuska #endif
40cdf63a70SMartin Matuska #ifdef HAVE_TIME_H
41cdf63a70SMartin Matuska #include <time.h>
42cdf63a70SMartin Matuska #endif
43cdf63a70SMartin Matuska 
44cdf63a70SMartin Matuska #include "archive.h"
45cdf63a70SMartin Matuska #include "archive_entry.h"
46cdf63a70SMartin Matuska #include "archive_entry_locale.h"
47cdf63a70SMartin Matuska #include "archive_private.h"
48cdf63a70SMartin Matuska #include "archive_random_private.h"
49cdf63a70SMartin Matuska #include "archive_write_private.h"
50f9762417SMartin Matuska #include "archive_write_set_format_private.h"
51cdf63a70SMartin Matuska 
52cdf63a70SMartin Matuska struct warc_s {
53cdf63a70SMartin Matuska 	unsigned int omit_warcinfo:1;
54cdf63a70SMartin Matuska 
55cdf63a70SMartin Matuska 	time_t now;
56cdf63a70SMartin Matuska 	mode_t typ;
57cdf63a70SMartin Matuska 	unsigned int rng;
58cdf63a70SMartin Matuska 	/* populated size */
59cdf63a70SMartin Matuska 	uint64_t populz;
60cdf63a70SMartin Matuska };
61cdf63a70SMartin Matuska 
62cdf63a70SMartin Matuska static const char warcinfo[] =
63cdf63a70SMartin Matuska     "software: libarchive/" ARCHIVE_VERSION_ONLY_STRING "\r\n"
64cdf63a70SMartin Matuska     "format: WARC file version 1.0\r\n";
65cdf63a70SMartin Matuska 
66cdf63a70SMartin Matuska typedef enum {
67cdf63a70SMartin Matuska 	WT_NONE,
68cdf63a70SMartin Matuska 	/* warcinfo */
69cdf63a70SMartin Matuska 	WT_INFO,
70cdf63a70SMartin Matuska 	/* metadata */
71cdf63a70SMartin Matuska 	WT_META,
72cdf63a70SMartin Matuska 	/* resource */
73cdf63a70SMartin Matuska 	WT_RSRC,
74cdf63a70SMartin Matuska 	/* request, unsupported */
75cdf63a70SMartin Matuska 	WT_REQ,
76cdf63a70SMartin Matuska 	/* response, unsupported */
77cdf63a70SMartin Matuska 	WT_RSP,
78cdf63a70SMartin Matuska 	/* revisit, unsupported */
79cdf63a70SMartin Matuska 	WT_RVIS,
80cdf63a70SMartin Matuska 	/* conversion, unsupported */
81cdf63a70SMartin Matuska 	WT_CONV,
8209c253fdSMartin Matuska 	/* continuation, unsupported at the moment */
83cdf63a70SMartin Matuska 	WT_CONT,
84cdf63a70SMartin Matuska 	/* invalid type */
85cdf63a70SMartin Matuska 	LAST_WT
86cdf63a70SMartin Matuska } warc_type_t;
87cdf63a70SMartin Matuska 
88cdf63a70SMartin Matuska typedef struct {
89cdf63a70SMartin Matuska 	warc_type_t type;
90cdf63a70SMartin Matuska 	const char *tgturi;
91cdf63a70SMartin Matuska 	const char *recid;
92cdf63a70SMartin Matuska 	time_t rtime;
93cdf63a70SMartin Matuska 	time_t mtime;
94cdf63a70SMartin Matuska 	const char *cnttyp;
95cdf63a70SMartin Matuska 	uint64_t cntlen;
96cdf63a70SMartin Matuska } warc_essential_hdr_t;
97cdf63a70SMartin Matuska 
98cdf63a70SMartin Matuska typedef struct {
99cdf63a70SMartin Matuska 	unsigned int u[4U];
100cdf63a70SMartin Matuska } warc_uuid_t;
101cdf63a70SMartin Matuska 
102cdf63a70SMartin Matuska static int _warc_options(struct archive_write*, const char *key, const char *v);
103cdf63a70SMartin Matuska static int _warc_header(struct archive_write *a, struct archive_entry *entry);
104cdf63a70SMartin Matuska static ssize_t _warc_data(struct archive_write *a, const void *buf, size_t sz);
105cdf63a70SMartin Matuska static int _warc_finish_entry(struct archive_write *a);
106cdf63a70SMartin Matuska static int _warc_close(struct archive_write *a);
107cdf63a70SMartin Matuska static int _warc_free(struct archive_write *a);
108cdf63a70SMartin Matuska 
109cdf63a70SMartin Matuska /* private routines */
110cdf63a70SMartin Matuska static ssize_t _popul_ehdr(struct archive_string *t, size_t z, warc_essential_hdr_t);
111cdf63a70SMartin Matuska static int _gen_uuid(warc_uuid_t *tgt);
112cdf63a70SMartin Matuska 
113cdf63a70SMartin Matuska 
114cdf63a70SMartin Matuska /*
115cdf63a70SMartin Matuska  * Set output format to ISO 28500 (aka WARC) format.
116cdf63a70SMartin Matuska  */
117cdf63a70SMartin Matuska int
archive_write_set_format_warc(struct archive * _a)118cdf63a70SMartin Matuska archive_write_set_format_warc(struct archive *_a)
119cdf63a70SMartin Matuska {
120cdf63a70SMartin Matuska 	struct archive_write *a = (struct archive_write *)_a;
121cdf63a70SMartin Matuska 	struct warc_s *w;
122cdf63a70SMartin Matuska 
123cdf63a70SMartin Matuska 	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
124cdf63a70SMartin Matuska 	    ARCHIVE_STATE_NEW, "archive_write_set_format_warc");
125cdf63a70SMartin Matuska 
126cdf63a70SMartin Matuska 	/* If another format was already registered, unregister it. */
127cdf63a70SMartin Matuska 	if (a->format_free != NULL) {
128cdf63a70SMartin Matuska 		(a->format_free)(a);
129cdf63a70SMartin Matuska 	}
130cdf63a70SMartin Matuska 
131cdf63a70SMartin Matuska 	w = malloc(sizeof(*w));
132cdf63a70SMartin Matuska 	if (w == NULL) {
133cdf63a70SMartin Matuska 		archive_set_error(&a->archive, ENOMEM,
134cdf63a70SMartin Matuska 		    "Can't allocate warc data");
135cdf63a70SMartin Matuska 		return (ARCHIVE_FATAL);
136cdf63a70SMartin Matuska 	}
137cdf63a70SMartin Matuska 	/* by default we're emitting a file wide header */
138cdf63a70SMartin Matuska 	w->omit_warcinfo = 0U;
139cdf63a70SMartin Matuska 	/* obtain current time for date fields */
140cdf63a70SMartin Matuska 	w->now = time(NULL);
141cdf63a70SMartin Matuska 	/* reset file type info */
142cdf63a70SMartin Matuska 	w->typ = 0;
143cdf63a70SMartin Matuska 	/* also initialise our rng */
144cdf63a70SMartin Matuska 	w->rng = (unsigned int)w->now;
145cdf63a70SMartin Matuska 
146cdf63a70SMartin Matuska 	a->format_data = w;
147cdf63a70SMartin Matuska 	a->format_name = "WARC/1.0";
148cdf63a70SMartin Matuska 	a->format_options = _warc_options;
149cdf63a70SMartin Matuska 	a->format_write_header = _warc_header;
150cdf63a70SMartin Matuska 	a->format_write_data = _warc_data;
151cdf63a70SMartin Matuska 	a->format_close = _warc_close;
152cdf63a70SMartin Matuska 	a->format_free = _warc_free;
153cdf63a70SMartin Matuska 	a->format_finish_entry = _warc_finish_entry;
154cdf63a70SMartin Matuska 	a->archive.archive_format = ARCHIVE_FORMAT_WARC;
155cdf63a70SMartin Matuska 	a->archive.archive_format_name = "WARC/1.0";
156cdf63a70SMartin Matuska 	return (ARCHIVE_OK);
157cdf63a70SMartin Matuska }
158cdf63a70SMartin Matuska 
159cdf63a70SMartin Matuska 
160cdf63a70SMartin Matuska /* archive methods */
161cdf63a70SMartin Matuska static int
_warc_options(struct archive_write * a,const char * key,const char * val)162cdf63a70SMartin Matuska _warc_options(struct archive_write *a, const char *key, const char *val)
163cdf63a70SMartin Matuska {
164cdf63a70SMartin Matuska 	struct warc_s *w = a->format_data;
165cdf63a70SMartin Matuska 
166cdf63a70SMartin Matuska 	if (strcmp(key, "omit-warcinfo") == 0) {
167cdf63a70SMartin Matuska 		if (val == NULL || strcmp(val, "true") == 0) {
168cdf63a70SMartin Matuska 			/* great */
169cdf63a70SMartin Matuska 			w->omit_warcinfo = 1U;
170cdf63a70SMartin Matuska 			return (ARCHIVE_OK);
171cdf63a70SMartin Matuska 		}
172cdf63a70SMartin Matuska 	}
173cdf63a70SMartin Matuska 
174cdf63a70SMartin Matuska 	/* Note: The "warn" return is just to inform the options
175cdf63a70SMartin Matuska 	 * supervisor that we didn't handle it.  It will generate
176cdf63a70SMartin Matuska 	 * a suitable error if no one used this option. */
177cdf63a70SMartin Matuska 	return (ARCHIVE_WARN);
178cdf63a70SMartin Matuska }
179cdf63a70SMartin Matuska 
180cdf63a70SMartin Matuska static int
_warc_header(struct archive_write * a,struct archive_entry * entry)181cdf63a70SMartin Matuska _warc_header(struct archive_write *a, struct archive_entry *entry)
182cdf63a70SMartin Matuska {
183cdf63a70SMartin Matuska 	struct warc_s *w = a->format_data;
184cdf63a70SMartin Matuska 	struct archive_string hdr;
185cdf63a70SMartin Matuska #define MAX_HDR_SIZE 512
186cdf63a70SMartin Matuska 
187cdf63a70SMartin Matuska 	/* check whether warcinfo record needs outputting */
188cdf63a70SMartin Matuska 	if (!w->omit_warcinfo) {
189cdf63a70SMartin Matuska 		ssize_t r;
190cdf63a70SMartin Matuska 		warc_essential_hdr_t wi = {
191cdf63a70SMartin Matuska 			WT_INFO,
192cdf63a70SMartin Matuska 			/*uri*/NULL,
193cdf63a70SMartin Matuska 			/*urn*/NULL,
194cdf63a70SMartin Matuska 			/*rtm*/0,
195cdf63a70SMartin Matuska 			/*mtm*/0,
196cdf63a70SMartin Matuska 			/*cty*/"application/warc-fields",
197cdf63a70SMartin Matuska 			/*len*/sizeof(warcinfo) - 1U,
198cdf63a70SMartin Matuska 		};
199cdf63a70SMartin Matuska 		wi.rtime = w->now;
200cdf63a70SMartin Matuska 		wi.mtime = w->now;
201cdf63a70SMartin Matuska 
202cdf63a70SMartin Matuska 		archive_string_init(&hdr);
203cdf63a70SMartin Matuska 		r = _popul_ehdr(&hdr, MAX_HDR_SIZE, wi);
204cdf63a70SMartin Matuska 		if (r >= 0) {
205cdf63a70SMartin Matuska 			/* jackpot! */
206cdf63a70SMartin Matuska 			/* now also use HDR buffer for the actual warcinfo */
207cdf63a70SMartin Matuska 			archive_strncat(&hdr, warcinfo, sizeof(warcinfo) -1);
208cdf63a70SMartin Matuska 
209cdf63a70SMartin Matuska 			/* append end-of-record indicator */
210cdf63a70SMartin Matuska 			archive_strncat(&hdr, "\r\n\r\n", 4);
211cdf63a70SMartin Matuska 
212cdf63a70SMartin Matuska 			/* write to output stream */
213cdf63a70SMartin Matuska 			__archive_write_output(a, hdr.s, archive_strlen(&hdr));
214cdf63a70SMartin Matuska 		}
215cdf63a70SMartin Matuska 		/* indicate we're done with file header writing */
216cdf63a70SMartin Matuska 		w->omit_warcinfo = 1U;
217cdf63a70SMartin Matuska 		archive_string_free(&hdr);
218cdf63a70SMartin Matuska 	}
219cdf63a70SMartin Matuska 
220cdf63a70SMartin Matuska 	if (archive_entry_pathname(entry) == NULL) {
221cdf63a70SMartin Matuska 		archive_set_error(&a->archive, EINVAL,
222cdf63a70SMartin Matuska 		    "Invalid filename");
223cdf63a70SMartin Matuska 		return (ARCHIVE_WARN);
224cdf63a70SMartin Matuska 	}
225cdf63a70SMartin Matuska 
226cdf63a70SMartin Matuska 	w->typ = archive_entry_filetype(entry);
227cdf63a70SMartin Matuska 	w->populz = 0U;
228cdf63a70SMartin Matuska 	if (w->typ == AE_IFREG) {
229cdf63a70SMartin Matuska 		warc_essential_hdr_t rh = {
230cdf63a70SMartin Matuska 			WT_RSRC,
231cdf63a70SMartin Matuska 			/*uri*/NULL,
232cdf63a70SMartin Matuska 			/*urn*/NULL,
233cdf63a70SMartin Matuska 			/*rtm*/0,
234cdf63a70SMartin Matuska 			/*mtm*/0,
235cdf63a70SMartin Matuska 			/*cty*/NULL,
236cdf63a70SMartin Matuska 			/*len*/0,
237cdf63a70SMartin Matuska 		};
238cdf63a70SMartin Matuska 		ssize_t r;
239cdf63a70SMartin Matuska 		rh.tgturi = archive_entry_pathname(entry);
240cdf63a70SMartin Matuska 		rh.rtime = w->now;
241cdf63a70SMartin Matuska 		rh.mtime = archive_entry_mtime(entry);
242cdf63a70SMartin Matuska 		rh.cntlen = (size_t)archive_entry_size(entry);
243cdf63a70SMartin Matuska 
244cdf63a70SMartin Matuska 		archive_string_init(&hdr);
245cdf63a70SMartin Matuska 		r = _popul_ehdr(&hdr, MAX_HDR_SIZE, rh);
246cdf63a70SMartin Matuska 		if (r < 0) {
247cdf63a70SMartin Matuska 			/* don't bother */
248cdf63a70SMartin Matuska 			archive_set_error(
249cdf63a70SMartin Matuska 				&a->archive,
250cdf63a70SMartin Matuska 				ARCHIVE_ERRNO_FILE_FORMAT,
251cdf63a70SMartin Matuska 				"cannot archive file");
252cdf63a70SMartin Matuska 			return (ARCHIVE_WARN);
253cdf63a70SMartin Matuska 		}
254cdf63a70SMartin Matuska 		/* otherwise append to output stream */
255cdf63a70SMartin Matuska 		__archive_write_output(a, hdr.s, r);
256cdf63a70SMartin Matuska 		/* and let subsequent calls to _data() know about the size */
257cdf63a70SMartin Matuska 		w->populz = rh.cntlen;
258cdf63a70SMartin Matuska 		archive_string_free(&hdr);
259cdf63a70SMartin Matuska 		return (ARCHIVE_OK);
260cdf63a70SMartin Matuska 	}
261cdf63a70SMartin Matuska 	/* just resort to erroring as per Tim's advice */
262f9762417SMartin Matuska 	__archive_write_entry_filetype_unsupported(
263f9762417SMartin Matuska 	    &a->archive, entry, "WARC");
264cdf63a70SMartin Matuska 	return (ARCHIVE_FAILED);
265cdf63a70SMartin Matuska }
266cdf63a70SMartin Matuska 
267cdf63a70SMartin Matuska static ssize_t
_warc_data(struct archive_write * a,const void * buf,size_t len)268cdf63a70SMartin Matuska _warc_data(struct archive_write *a, const void *buf, size_t len)
269cdf63a70SMartin Matuska {
270cdf63a70SMartin Matuska 	struct warc_s *w = a->format_data;
271cdf63a70SMartin Matuska 
272cdf63a70SMartin Matuska 	if (w->typ == AE_IFREG) {
273cdf63a70SMartin Matuska 		int rc;
274cdf63a70SMartin Matuska 
275cdf63a70SMartin Matuska 		/* never write more bytes than announced */
276cdf63a70SMartin Matuska 		if (len > w->populz) {
277cdf63a70SMartin Matuska 			len = (size_t)w->populz;
278cdf63a70SMartin Matuska 		}
279cdf63a70SMartin Matuska 
280cdf63a70SMartin Matuska 		/* now then, out we put the whole shebang */
281cdf63a70SMartin Matuska 		rc = __archive_write_output(a, buf, len);
282cdf63a70SMartin Matuska 		if (rc != ARCHIVE_OK) {
283cdf63a70SMartin Matuska 			return rc;
284cdf63a70SMartin Matuska 		}
285cdf63a70SMartin Matuska 	}
286cdf63a70SMartin Matuska 	return len;
287cdf63a70SMartin Matuska }
288cdf63a70SMartin Matuska 
289cdf63a70SMartin Matuska static int
_warc_finish_entry(struct archive_write * a)290cdf63a70SMartin Matuska _warc_finish_entry(struct archive_write *a)
291cdf63a70SMartin Matuska {
292cdf63a70SMartin Matuska 	static const char _eor[] = "\r\n\r\n";
293cdf63a70SMartin Matuska 	struct warc_s *w = a->format_data;
294cdf63a70SMartin Matuska 
295cdf63a70SMartin Matuska 	if (w->typ == AE_IFREG) {
296cdf63a70SMartin Matuska 		int rc = __archive_write_output(a, _eor, sizeof(_eor) - 1U);
297cdf63a70SMartin Matuska 
298cdf63a70SMartin Matuska 		if (rc != ARCHIVE_OK) {
299cdf63a70SMartin Matuska 			return rc;
300cdf63a70SMartin Matuska 		}
301cdf63a70SMartin Matuska 	}
302cdf63a70SMartin Matuska 	/* reset type info */
303cdf63a70SMartin Matuska 	w->typ = 0;
304cdf63a70SMartin Matuska 	return (ARCHIVE_OK);
305cdf63a70SMartin Matuska }
306cdf63a70SMartin Matuska 
307cdf63a70SMartin Matuska static int
_warc_close(struct archive_write * a)308cdf63a70SMartin Matuska _warc_close(struct archive_write *a)
309cdf63a70SMartin Matuska {
310cdf63a70SMartin Matuska 	(void)a; /* UNUSED */
311cdf63a70SMartin Matuska 	return (ARCHIVE_OK);
312cdf63a70SMartin Matuska }
313cdf63a70SMartin Matuska 
314cdf63a70SMartin Matuska static int
_warc_free(struct archive_write * a)315cdf63a70SMartin Matuska _warc_free(struct archive_write *a)
316cdf63a70SMartin Matuska {
317cdf63a70SMartin Matuska 	struct warc_s *w = a->format_data;
318cdf63a70SMartin Matuska 
319cdf63a70SMartin Matuska 	free(w);
320cdf63a70SMartin Matuska 	a->format_data = NULL;
321cdf63a70SMartin Matuska 	return (ARCHIVE_OK);
322cdf63a70SMartin Matuska }
323cdf63a70SMartin Matuska 
324cdf63a70SMartin Matuska 
325cdf63a70SMartin Matuska /* private routines */
326cdf63a70SMartin Matuska static void
xstrftime(struct archive_string * as,const char * fmt,time_t t)327cdf63a70SMartin Matuska xstrftime(struct archive_string *as, const char *fmt, time_t t)
328cdf63a70SMartin Matuska {
329cdf63a70SMartin Matuska /** like strftime(3) but for time_t objects */
330cdf63a70SMartin Matuska 	struct tm *rt;
331*e64fe029SMartin Matuska #if defined(HAVE_GMTIME_R) || defined(HAVE_GMTIME_S)
332cdf63a70SMartin Matuska 	struct tm timeHere;
333cdf63a70SMartin Matuska #endif
334cdf63a70SMartin Matuska 	char strtime[100];
335cdf63a70SMartin Matuska 	size_t len;
336cdf63a70SMartin Matuska 
337*e64fe029SMartin Matuska #if defined(HAVE_GMTIME_S)
338*e64fe029SMartin Matuska 	rt = gmtime_s(&timeHere, &t) ? NULL : &timeHere;
339*e64fe029SMartin Matuska #elif defined(HAVE_GMTIME_R)
340*e64fe029SMartin Matuska 	rt = gmtime_r(&t, &timeHere);
341cdf63a70SMartin Matuska #else
342*e64fe029SMartin Matuska 	rt = gmtime(&t);
343cdf63a70SMartin Matuska #endif
344*e64fe029SMartin Matuska 	if (!rt)
345*e64fe029SMartin Matuska 		return;
346cdf63a70SMartin Matuska 	/* leave the hard yacker to our role model strftime() */
347cdf63a70SMartin Matuska 	len = strftime(strtime, sizeof(strtime)-1, fmt, rt);
348cdf63a70SMartin Matuska 	archive_strncat(as, strtime, len);
349cdf63a70SMartin Matuska }
350cdf63a70SMartin Matuska 
351cdf63a70SMartin Matuska static ssize_t
_popul_ehdr(struct archive_string * tgt,size_t tsz,warc_essential_hdr_t hdr)352cdf63a70SMartin Matuska _popul_ehdr(struct archive_string *tgt, size_t tsz, warc_essential_hdr_t hdr)
353cdf63a70SMartin Matuska {
354cdf63a70SMartin Matuska 	static const char _ver[] = "WARC/1.0\r\n";
355a7f7e457SMartin Matuska 	static const char * const _typ[LAST_WT] = {
356cdf63a70SMartin Matuska 		NULL, "warcinfo", "metadata", "resource", NULL
357cdf63a70SMartin Matuska 	};
358cdf63a70SMartin Matuska 	char std_uuid[48U];
359cdf63a70SMartin Matuska 
360cdf63a70SMartin Matuska 	if (hdr.type == WT_NONE || hdr.type > WT_RSRC) {
361cdf63a70SMartin Matuska 		/* brilliant, how exactly did we get here? */
362cdf63a70SMartin Matuska 		return -1;
363cdf63a70SMartin Matuska 	}
364cdf63a70SMartin Matuska 
365cdf63a70SMartin Matuska 	archive_strcpy(tgt, _ver);
366cdf63a70SMartin Matuska 
367cdf63a70SMartin Matuska 	archive_string_sprintf(tgt, "WARC-Type: %s\r\n", _typ[hdr.type]);
368cdf63a70SMartin Matuska 
369cdf63a70SMartin Matuska 	if (hdr.tgturi != NULL) {
370cdf63a70SMartin Matuska 		/* check if there's a xyz:// */
371cdf63a70SMartin Matuska 		static const char _uri[] = "";
372cdf63a70SMartin Matuska 		static const char _fil[] = "file://";
373cdf63a70SMartin Matuska 		const char *u;
374cdf63a70SMartin Matuska 		char *chk = strchr(hdr.tgturi, ':');
375cdf63a70SMartin Matuska 
376cdf63a70SMartin Matuska 		if (chk != NULL && chk[1U] == '/' && chk[2U] == '/') {
377cdf63a70SMartin Matuska 			/* yep, it's definitely a URI */
378cdf63a70SMartin Matuska 			u = _uri;
379cdf63a70SMartin Matuska 		} else {
380cdf63a70SMartin Matuska 			/* hm, best to prepend file:// then */
381cdf63a70SMartin Matuska 			u = _fil;
382cdf63a70SMartin Matuska 		}
383cdf63a70SMartin Matuska 		archive_string_sprintf(tgt,
384cdf63a70SMartin Matuska 			"WARC-Target-URI: %s%s\r\n", u, hdr.tgturi);
385cdf63a70SMartin Matuska 	}
386cdf63a70SMartin Matuska 
387cdf63a70SMartin Matuska 	/* record time is usually when the http is sent off,
388cdf63a70SMartin Matuska 	 * just treat the archive writing as such for a moment */
389cdf63a70SMartin Matuska 	xstrftime(tgt, "WARC-Date: %Y-%m-%dT%H:%M:%SZ\r\n", hdr.rtime);
390cdf63a70SMartin Matuska 
391cdf63a70SMartin Matuska 	/* while we're at it, record the mtime */
392cdf63a70SMartin Matuska 	xstrftime(tgt, "Last-Modified: %Y-%m-%dT%H:%M:%SZ\r\n", hdr.mtime);
393cdf63a70SMartin Matuska 
394cdf63a70SMartin Matuska 	if (hdr.recid == NULL) {
395cdf63a70SMartin Matuska 		/* generate one, grrrr */
396cdf63a70SMartin Matuska 		warc_uuid_t u;
397cdf63a70SMartin Matuska 
398cdf63a70SMartin Matuska 		_gen_uuid(&u);
399cdf63a70SMartin Matuska 		/* Unfortunately, archive_string_sprintf does not
400cdf63a70SMartin Matuska 		 * handle the minimum number following '%'.
401cdf63a70SMartin Matuska 		 * So we have to use snprintf function here instead
402cdf63a70SMartin Matuska 		 * of archive_string_snprintf function. */
403cdf63a70SMartin Matuska #if defined(_WIN32) && !defined(__CYGWIN__) && !( defined(_MSC_VER) && _MSC_VER >= 1900)
404cdf63a70SMartin Matuska #define snprintf _snprintf
405cdf63a70SMartin Matuska #endif
406cdf63a70SMartin Matuska 		snprintf(
407cdf63a70SMartin Matuska 			std_uuid, sizeof(std_uuid),
408cdf63a70SMartin Matuska 			"<urn:uuid:%08x-%04x-%04x-%04x-%04x%08x>",
409cdf63a70SMartin Matuska 			u.u[0U],
410cdf63a70SMartin Matuska 			u.u[1U] >> 16U, u.u[1U] & 0xffffU,
411cdf63a70SMartin Matuska 			u.u[2U] >> 16U, u.u[2U] & 0xffffU,
412cdf63a70SMartin Matuska 			u.u[3U]);
413cdf63a70SMartin Matuska 		hdr.recid = std_uuid;
414cdf63a70SMartin Matuska 	}
415cdf63a70SMartin Matuska 
416cdf63a70SMartin Matuska 	/* record-id is mandatory, fingers crossed we won't fail */
417cdf63a70SMartin Matuska 	archive_string_sprintf(tgt, "WARC-Record-ID: %s\r\n", hdr.recid);
418cdf63a70SMartin Matuska 
419cdf63a70SMartin Matuska 	if (hdr.cnttyp != NULL) {
420cdf63a70SMartin Matuska 		archive_string_sprintf(tgt, "Content-Type: %s\r\n", hdr.cnttyp);
421cdf63a70SMartin Matuska 	}
422cdf63a70SMartin Matuska 
423cdf63a70SMartin Matuska 	/* next one is mandatory */
424cdf63a70SMartin Matuska 	archive_string_sprintf(tgt, "Content-Length: %ju\r\n", (uintmax_t)hdr.cntlen);
425cdf63a70SMartin Matuska 	/**/
426cdf63a70SMartin Matuska 	archive_strncat(tgt, "\r\n", 2);
427cdf63a70SMartin Matuska 
428cdf63a70SMartin Matuska 	return (archive_strlen(tgt) >= tsz)? -1: (ssize_t)archive_strlen(tgt);
429cdf63a70SMartin Matuska }
430cdf63a70SMartin Matuska 
431cdf63a70SMartin Matuska static int
_gen_uuid(warc_uuid_t * tgt)432cdf63a70SMartin Matuska _gen_uuid(warc_uuid_t *tgt)
433cdf63a70SMartin Matuska {
434cdf63a70SMartin Matuska 	archive_random(tgt->u, sizeof(tgt->u));
435cdf63a70SMartin Matuska 	/* obey uuid version 4 rules */
436cdf63a70SMartin Matuska 	tgt->u[1U] &= 0xffff0fffU;
437cdf63a70SMartin Matuska 	tgt->u[1U] |= 0x4000U;
438cdf63a70SMartin Matuska 	tgt->u[2U] &= 0x3fffffffU;
439cdf63a70SMartin Matuska 	tgt->u[2U] |= 0x80000000U;
440cdf63a70SMartin Matuska 	return 0;
441cdf63a70SMartin Matuska }
442cdf63a70SMartin Matuska 
443cdf63a70SMartin Matuska /* archive_write_set_format_warc.c ends here */
444