xref: /minix/common/dist/zlib/examples/gzlog.c (revision 44bedb31)
1 /*	$NetBSD: gzlog.c,v 1.1.1.1 2006/01/14 20:11:09 christos Exp $	*/
2 
3 /*
4  * gzlog.c
5  * Copyright (C) 2004 Mark Adler
6  * For conditions of distribution and use, see copyright notice in gzlog.h
7  * version 1.0, 26 Nov 2004
8  *
9  */
10 
11 #include <string.h>             /* memcmp() */
12 #include <stdlib.h>             /* malloc(), free(), NULL */
13 #include <sys/types.h>          /* size_t, off_t */
14 #include <unistd.h>             /* read(), close(), sleep(), ftruncate(), */
15                                 /* lseek() */
16 #include <fcntl.h>              /* open() */
17 #include <sys/file.h>           /* flock() */
18 #include "zlib.h"               /* deflateInit2(), deflate(), deflateEnd() */
19 
20 #include "gzlog.h"              /* interface */
21 #define local static
22 
23 /* log object structure */
24 typedef struct {
25     int id;                 /* object identifier */
26     int fd;                 /* log file descriptor */
27     off_t extra;            /* offset of extra "ap" subfield */
28     off_t mark_off;         /* offset of marked data */
29     off_t last_off;         /* offset of last block */
30     unsigned long crc;      /* uncompressed crc */
31     unsigned long len;      /* uncompressed length (modulo 2^32) */
32     unsigned stored;        /* length of current stored block */
33 } gz_log;
34 
35 #define GZLOGID 19334       /* gz_log object identifier */
36 
37 #define LOCK_RETRY 1            /* retry lock once a second */
38 #define LOCK_PATIENCE 1200      /* try about twenty minutes before forcing */
39 
40 /* acquire a lock on a file */
lock(int fd)41 local int lock(int fd)
42 {
43     int patience;
44 
45     /* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */
46     patience = LOCK_PATIENCE;
47     do {
48         if (flock(fd, LOCK_EX + LOCK_NB) == 0)
49             return 0;
50         (void)sleep(LOCK_RETRY);
51         patience -= LOCK_RETRY;
52     } while (patience > 0);
53 
54     /* we've run out of patience -- give up */
55     return -1;
56 }
57 
58 /* release lock */
unlock(int fd)59 local void unlock(int fd)
60 {
61     (void)flock(fd, LOCK_UN);
62 }
63 
64 /* release a log object */
log_clean(gz_log * log)65 local void log_clean(gz_log *log)
66 {
67     unlock(log->fd);
68     (void)close(log->fd);
69     free(log);
70 }
71 
72 /* read an unsigned long from a byte buffer little-endian */
make_ulg(unsigned char * buf)73 local unsigned long make_ulg(unsigned char *buf)
74 {
75     int n;
76     unsigned long val;
77 
78     val = (unsigned long)(*buf++);
79     for (n = 8; n < 32; n += 8)
80         val += (unsigned long)(*buf++) << n;
81     return val;
82 }
83 
84 /* read an off_t from a byte buffer little-endian */
make_off(unsigned char * buf)85 local off_t make_off(unsigned char *buf)
86 {
87     int n;
88     off_t val;
89 
90     val = (off_t)(*buf++);
91     for (n = 8; n < 64; n += 8)
92         val += (off_t)(*buf++) << n;
93     return val;
94 }
95 
96 /* write an unsigned long little-endian to byte buffer */
dice_ulg(unsigned long val,unsigned char * buf)97 local void dice_ulg(unsigned long val, unsigned char *buf)
98 {
99     int n;
100 
101     for (n = 0; n < 4; n++) {
102         *buf++ = val & 0xff;
103         val >>= 8;
104     }
105 }
106 
107 /* write an off_t little-endian to byte buffer */
dice_off(off_t val,unsigned char * buf)108 local void dice_off(off_t val, unsigned char *buf)
109 {
110     int n;
111 
112     for (n = 0; n < 8; n++) {
113         *buf++ = val & 0xff;
114         val >>= 8;
115     }
116 }
117 
118 /* initial, empty gzip file for appending */
119 local char empty_gz[] = {
120     0x1f, 0x8b,                 /* magic gzip id */
121     8,                          /* compression method is deflate */
122     4,                          /* there is an extra field */
123     0, 0, 0, 0,                 /* no modification time provided */
124     0, 0xff,                    /* no extra flags, no OS */
125     20, 0, 'a', 'p', 16, 0,     /* extra field with "ap" subfield */
126     32, 0, 0, 0, 0, 0, 0, 0,    /* offset of uncompressed data */
127     32, 0, 0, 0, 0, 0, 0, 0,    /* offset of last block */
128     1, 0, 0, 0xff, 0xff,        /* empty stored block (last) */
129     0, 0, 0, 0,                 /* crc */
130     0, 0, 0, 0                  /* uncompressed length */
131 };
132 
133 /* initialize a log object with locking */
gzlog_open(char * path)134 void *gzlog_open(char *path)
135 {
136     unsigned xlen;
137     unsigned char temp[20];
138     unsigned sub_len;
139     int good;
140     gz_log *log;
141 
142     /* allocate log structure */
143     log = malloc(sizeof(gz_log));
144     if (log == NULL)
145         return NULL;
146     log->id = GZLOGID;
147 
148     /* open file, creating it if necessary, and locking it */
149     log->fd = open(path, O_RDWR | O_CREAT, 0600);
150     if (log->fd < 0) {
151         free(log);
152         return NULL;
153     }
154     if (lock(log->fd)) {
155         close(log->fd);
156         free(log);
157         return NULL;
158     }
159 
160     /* if file is empty, write new gzip stream */
161     if (lseek(log->fd, 0, SEEK_END) == 0) {
162         if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) {
163             log_clean(log);
164             return NULL;
165         }
166     }
167 
168     /* check gzip header */
169     (void)lseek(log->fd, 0, SEEK_SET);
170     if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f ||
171         temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) {
172         log_clean(log);
173         return NULL;
174     }
175 
176     /* process extra field to find "ap" sub-field */
177     xlen = temp[10] + (temp[11] << 8);
178     good = 0;
179     while (xlen) {
180         if (xlen < 4 || read(log->fd, temp, 4) != 4)
181             break;
182         sub_len = temp[2];
183         sub_len += temp[3] << 8;
184         xlen -= 4;
185         if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) {
186             good = 1;
187             break;
188         }
189         if (xlen < sub_len)
190             break;
191         (void)lseek(log->fd, sub_len, SEEK_CUR);
192         xlen -= sub_len;
193     }
194     if (!good) {
195         log_clean(log);
196         return NULL;
197     }
198 
199     /* read in "ap" sub-field */
200     log->extra = lseek(log->fd, 0, SEEK_CUR);
201     if (read(log->fd, temp, 16) != 16) {
202         log_clean(log);
203         return NULL;
204     }
205     log->mark_off = make_off(temp);
206     log->last_off = make_off(temp + 8);
207 
208     /* get crc, length of gzip file */
209     (void)lseek(log->fd, log->last_off, SEEK_SET);
210     if (read(log->fd, temp, 13) != 13 ||
211         memcmp(temp, "\001\000\000\377\377", 5) != 0) {
212         log_clean(log);
213         return NULL;
214     }
215     log->crc = make_ulg(temp + 5);
216     log->len = make_ulg(temp + 9);
217 
218     /* set up to write over empty last block */
219     (void)lseek(log->fd, log->last_off + 5, SEEK_SET);
220     log->stored = 0;
221     return (void *)log;
222 }
223 
224 /* maximum amount to put in a stored block before starting a new one */
225 #define MAX_BLOCK 16384
226 
227 /* write a block to a log object */
gzlog_write(void * obj,char * data,size_t len)228 int gzlog_write(void *obj, char *data, size_t len)
229 {
230     size_t some;
231     unsigned char temp[5];
232     gz_log *log;
233 
234     /* check object */
235     log = (gz_log *)obj;
236     if (log == NULL || log->id != GZLOGID)
237         return 1;
238 
239     /* write stored blocks until all of the input is written */
240     do {
241         some = MAX_BLOCK - log->stored;
242         if (some > len)
243             some = len;
244         if (write(log->fd, data, some) != some)
245             return 1;
246         log->crc = crc32(log->crc, data, some);
247         log->len += some;
248         len -= some;
249         data += some;
250         log->stored += some;
251 
252         /* if the stored block is full, end it and start another */
253         if (log->stored == MAX_BLOCK) {
254             (void)lseek(log->fd, log->last_off, SEEK_SET);
255             temp[0] = 0;
256             dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16),
257                      temp + 1);
258             if (write(log->fd, temp, 5) != 5)
259                 return 1;
260             log->last_off = lseek(log->fd, log->stored, SEEK_CUR);
261             (void)lseek(log->fd, 5, SEEK_CUR);
262             log->stored = 0;
263         }
264     } while (len);
265     return 0;
266 }
267 
268 /* recompress the remaining stored deflate data in place */
recomp(gz_log * log)269 local int recomp(gz_log *log)
270 {
271     z_stream strm;
272     size_t len, max;
273     unsigned char *in;
274     unsigned char *out;
275     unsigned char temp[16];
276 
277     /* allocate space and read it all in (it's around 1 MB) */
278     len = log->last_off - log->mark_off;
279     max = len + (len >> 12) + (len >> 14) + 11;
280     out = malloc(max);
281     if (out == NULL)
282         return 1;
283     in = malloc(len);
284     if (in == NULL) {
285         free(out);
286         return 1;
287     }
288     (void)lseek(log->fd, log->mark_off, SEEK_SET);
289     if (read(log->fd, in, len) != len) {
290         free(in);
291         free(out);
292         return 1;
293     }
294 
295     /* recompress in memory, decoding stored data as we go */
296     /* note: this assumes that unsigned is four bytes or more */
297     /*       consider not making that assumption */
298     strm.zalloc = Z_NULL;
299     strm.zfree = Z_NULL;
300     strm.opaque = Z_NULL;
301     if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8,
302         Z_DEFAULT_STRATEGY) != Z_OK) {
303         free(in);
304         free(out);
305         return 1;
306     }
307     strm.next_in = in;
308     strm.avail_out = max;
309     strm.next_out = out;
310     while (len >= 5) {
311         if (strm.next_in[0] != 0)
312             break;
313         strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8);
314         strm.next_in += 5;
315         len -= 5;
316         if (strm.avail_in != 0) {
317             if (len < strm.avail_in)
318                 break;
319             len -= strm.avail_in;
320             (void)deflate(&strm, Z_NO_FLUSH);
321             if (strm.avail_in != 0 || strm.avail_out == 0)
322                 break;
323         }
324     }
325     (void)deflate(&strm, Z_SYNC_FLUSH);
326     (void)deflateEnd(&strm);
327     free(in);
328     if (len != 0 || strm.avail_out == 0) {
329         free(out);
330         return 1;
331     }
332 
333     /* overwrite stored data with compressed data */
334     (void)lseek(log->fd, log->mark_off, SEEK_SET);
335     len = max - strm.avail_out;
336     if (write(log->fd, out, len) != len) {
337         free(out);
338         return 1;
339     }
340     free(out);
341 
342     /* write last empty block, crc, and length */
343     log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR);
344     temp[0] = 1;
345     dice_ulg(0xffffL << 16, temp + 1);
346     dice_ulg(log->crc, temp + 5);
347     dice_ulg(log->len, temp + 9);
348     if (write(log->fd, temp, 13) != 13)
349         return 1;
350 
351     /* truncate file to discard remaining stored data and old trailer */
352     ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR));
353 
354     /* update extra field to point to new last empty block */
355     (void)lseek(log->fd, log->extra, SEEK_SET);
356     dice_off(log->mark_off, temp);
357     dice_off(log->last_off, temp + 8);
358     if (write(log->fd, temp, 16) != 16)
359         return 1;
360     return 0;
361 }
362 
363 /* maximum accumulation of stored blocks before compressing */
364 #define MAX_STORED 1048576
365 
366 /* close log object */
gzlog_close(void * obj)367 int gzlog_close(void *obj)
368 {
369     unsigned char temp[8];
370     gz_log *log;
371 
372     /* check object */
373     log = (gz_log *)obj;
374     if (log == NULL || log->id != GZLOGID)
375         return 1;
376 
377     /* go to start of most recent block being written */
378     (void)lseek(log->fd, log->last_off, SEEK_SET);
379 
380     /* if some stuff was put there, update block */
381     if (log->stored) {
382         temp[0] = 0;
383         dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16),
384                  temp + 1);
385         if (write(log->fd, temp, 5) != 5)
386             return 1;
387         log->last_off = lseek(log->fd, log->stored, SEEK_CUR);
388     }
389 
390     /* write last block (empty) */
391     if (write(log->fd, "\001\000\000\377\377", 5) != 5)
392         return 1;
393 
394     /* write updated crc and uncompressed length */
395     dice_ulg(log->crc, temp);
396     dice_ulg(log->len, temp + 4);
397     if (write(log->fd, temp, 8) != 8)
398         return 1;
399 
400     /* put offset of that last block in gzip extra block */
401     (void)lseek(log->fd, log->extra + 8, SEEK_SET);
402     dice_off(log->last_off, temp);
403     if (write(log->fd, temp, 8) != 8)
404         return 1;
405 
406     /* if more than 1 MB stored, then time to compress it */
407     if (log->last_off - log->mark_off > MAX_STORED) {
408         if (recomp(log))
409             return 1;
410     }
411 
412     /* unlock and close file */
413     log_clean(log);
414     return 0;
415 }
416