1 /* $NetBSD: gzlog.c,v 1.1.1.1 2006/01/14 20:11:09 christos Exp $ */ 2 3 /* 4 * gzlog.c 5 * Copyright (C) 2004 Mark Adler 6 * For conditions of distribution and use, see copyright notice in gzlog.h 7 * version 1.0, 26 Nov 2004 8 * 9 */ 10 11 #include <string.h> /* memcmp() */ 12 #include <stdlib.h> /* malloc(), free(), NULL */ 13 #include <sys/types.h> /* size_t, off_t */ 14 #include <unistd.h> /* read(), close(), sleep(), ftruncate(), */ 15 /* lseek() */ 16 #include <fcntl.h> /* open() */ 17 #include <sys/file.h> /* flock() */ 18 #include "zlib.h" /* deflateInit2(), deflate(), deflateEnd() */ 19 20 #include "gzlog.h" /* interface */ 21 #define local static 22 23 /* log object structure */ 24 typedef struct { 25 int id; /* object identifier */ 26 int fd; /* log file descriptor */ 27 off_t extra; /* offset of extra "ap" subfield */ 28 off_t mark_off; /* offset of marked data */ 29 off_t last_off; /* offset of last block */ 30 unsigned long crc; /* uncompressed crc */ 31 unsigned long len; /* uncompressed length (modulo 2^32) */ 32 unsigned stored; /* length of current stored block */ 33 } gz_log; 34 35 #define GZLOGID 19334 /* gz_log object identifier */ 36 37 #define LOCK_RETRY 1 /* retry lock once a second */ 38 #define LOCK_PATIENCE 1200 /* try about twenty minutes before forcing */ 39 40 /* acquire a lock on a file */ 41 local int lock(int fd) 42 { 43 int patience; 44 45 /* try to lock every LOCK_RETRY seconds for LOCK_PATIENCE seconds */ 46 patience = LOCK_PATIENCE; 47 do { 48 if (flock(fd, LOCK_EX + LOCK_NB) == 0) 49 return 0; 50 (void)sleep(LOCK_RETRY); 51 patience -= LOCK_RETRY; 52 } while (patience > 0); 53 54 /* we've run out of patience -- give up */ 55 return -1; 56 } 57 58 /* release lock */ 59 local void unlock(int fd) 60 { 61 (void)flock(fd, LOCK_UN); 62 } 63 64 /* release a log object */ 65 local void log_clean(gz_log *log) 66 { 67 unlock(log->fd); 68 (void)close(log->fd); 69 free(log); 70 } 71 72 /* read an unsigned long from a byte buffer little-endian */ 73 local unsigned long make_ulg(unsigned char *buf) 74 { 75 int n; 76 unsigned long val; 77 78 val = (unsigned long)(*buf++); 79 for (n = 8; n < 32; n += 8) 80 val += (unsigned long)(*buf++) << n; 81 return val; 82 } 83 84 /* read an off_t from a byte buffer little-endian */ 85 local off_t make_off(unsigned char *buf) 86 { 87 int n; 88 off_t val; 89 90 val = (off_t)(*buf++); 91 for (n = 8; n < 64; n += 8) 92 val += (off_t)(*buf++) << n; 93 return val; 94 } 95 96 /* write an unsigned long little-endian to byte buffer */ 97 local void dice_ulg(unsigned long val, unsigned char *buf) 98 { 99 int n; 100 101 for (n = 0; n < 4; n++) { 102 *buf++ = val & 0xff; 103 val >>= 8; 104 } 105 } 106 107 /* write an off_t little-endian to byte buffer */ 108 local void dice_off(off_t val, unsigned char *buf) 109 { 110 int n; 111 112 for (n = 0; n < 8; n++) { 113 *buf++ = val & 0xff; 114 val >>= 8; 115 } 116 } 117 118 /* initial, empty gzip file for appending */ 119 local char empty_gz[] = { 120 0x1f, 0x8b, /* magic gzip id */ 121 8, /* compression method is deflate */ 122 4, /* there is an extra field */ 123 0, 0, 0, 0, /* no modification time provided */ 124 0, 0xff, /* no extra flags, no OS */ 125 20, 0, 'a', 'p', 16, 0, /* extra field with "ap" subfield */ 126 32, 0, 0, 0, 0, 0, 0, 0, /* offset of uncompressed data */ 127 32, 0, 0, 0, 0, 0, 0, 0, /* offset of last block */ 128 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */ 129 0, 0, 0, 0, /* crc */ 130 0, 0, 0, 0 /* uncompressed length */ 131 }; 132 133 /* initialize a log object with locking */ 134 void *gzlog_open(char *path) 135 { 136 unsigned xlen; 137 unsigned char temp[20]; 138 unsigned sub_len; 139 int good; 140 gz_log *log; 141 142 /* allocate log structure */ 143 log = malloc(sizeof(gz_log)); 144 if (log == NULL) 145 return NULL; 146 log->id = GZLOGID; 147 148 /* open file, creating it if necessary, and locking it */ 149 log->fd = open(path, O_RDWR | O_CREAT, 0600); 150 if (log->fd < 0) { 151 free(log); 152 return NULL; 153 } 154 if (lock(log->fd)) { 155 close(log->fd); 156 free(log); 157 return NULL; 158 } 159 160 /* if file is empty, write new gzip stream */ 161 if (lseek(log->fd, 0, SEEK_END) == 0) { 162 if (write(log->fd, empty_gz, sizeof(empty_gz)) != sizeof(empty_gz)) { 163 log_clean(log); 164 return NULL; 165 } 166 } 167 168 /* check gzip header */ 169 (void)lseek(log->fd, 0, SEEK_SET); 170 if (read(log->fd, temp, 12) != 12 || temp[0] != 0x1f || 171 temp[1] != 0x8b || temp[2] != 8 || (temp[3] & 4) == 0) { 172 log_clean(log); 173 return NULL; 174 } 175 176 /* process extra field to find "ap" sub-field */ 177 xlen = temp[10] + (temp[11] << 8); 178 good = 0; 179 while (xlen) { 180 if (xlen < 4 || read(log->fd, temp, 4) != 4) 181 break; 182 sub_len = temp[2]; 183 sub_len += temp[3] << 8; 184 xlen -= 4; 185 if (memcmp(temp, "ap", 2) == 0 && sub_len == 16) { 186 good = 1; 187 break; 188 } 189 if (xlen < sub_len) 190 break; 191 (void)lseek(log->fd, sub_len, SEEK_CUR); 192 xlen -= sub_len; 193 } 194 if (!good) { 195 log_clean(log); 196 return NULL; 197 } 198 199 /* read in "ap" sub-field */ 200 log->extra = lseek(log->fd, 0, SEEK_CUR); 201 if (read(log->fd, temp, 16) != 16) { 202 log_clean(log); 203 return NULL; 204 } 205 log->mark_off = make_off(temp); 206 log->last_off = make_off(temp + 8); 207 208 /* get crc, length of gzip file */ 209 (void)lseek(log->fd, log->last_off, SEEK_SET); 210 if (read(log->fd, temp, 13) != 13 || 211 memcmp(temp, "\001\000\000\377\377", 5) != 0) { 212 log_clean(log); 213 return NULL; 214 } 215 log->crc = make_ulg(temp + 5); 216 log->len = make_ulg(temp + 9); 217 218 /* set up to write over empty last block */ 219 (void)lseek(log->fd, log->last_off + 5, SEEK_SET); 220 log->stored = 0; 221 return (void *)log; 222 } 223 224 /* maximum amount to put in a stored block before starting a new one */ 225 #define MAX_BLOCK 16384 226 227 /* write a block to a log object */ 228 int gzlog_write(void *obj, char *data, size_t len) 229 { 230 size_t some; 231 unsigned char temp[5]; 232 gz_log *log; 233 234 /* check object */ 235 log = (gz_log *)obj; 236 if (log == NULL || log->id != GZLOGID) 237 return 1; 238 239 /* write stored blocks until all of the input is written */ 240 do { 241 some = MAX_BLOCK - log->stored; 242 if (some > len) 243 some = len; 244 if (write(log->fd, data, some) != some) 245 return 1; 246 log->crc = crc32(log->crc, data, some); 247 log->len += some; 248 len -= some; 249 data += some; 250 log->stored += some; 251 252 /* if the stored block is full, end it and start another */ 253 if (log->stored == MAX_BLOCK) { 254 (void)lseek(log->fd, log->last_off, SEEK_SET); 255 temp[0] = 0; 256 dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), 257 temp + 1); 258 if (write(log->fd, temp, 5) != 5) 259 return 1; 260 log->last_off = lseek(log->fd, log->stored, SEEK_CUR); 261 (void)lseek(log->fd, 5, SEEK_CUR); 262 log->stored = 0; 263 } 264 } while (len); 265 return 0; 266 } 267 268 /* recompress the remaining stored deflate data in place */ 269 local int recomp(gz_log *log) 270 { 271 z_stream strm; 272 size_t len, max; 273 unsigned char *in; 274 unsigned char *out; 275 unsigned char temp[16]; 276 277 /* allocate space and read it all in (it's around 1 MB) */ 278 len = log->last_off - log->mark_off; 279 max = len + (len >> 12) + (len >> 14) + 11; 280 out = malloc(max); 281 if (out == NULL) 282 return 1; 283 in = malloc(len); 284 if (in == NULL) { 285 free(out); 286 return 1; 287 } 288 (void)lseek(log->fd, log->mark_off, SEEK_SET); 289 if (read(log->fd, in, len) != len) { 290 free(in); 291 free(out); 292 return 1; 293 } 294 295 /* recompress in memory, decoding stored data as we go */ 296 /* note: this assumes that unsigned is four bytes or more */ 297 /* consider not making that assumption */ 298 strm.zalloc = Z_NULL; 299 strm.zfree = Z_NULL; 300 strm.opaque = Z_NULL; 301 if (deflateInit2(&strm, Z_BEST_COMPRESSION, Z_DEFLATED, -15, 8, 302 Z_DEFAULT_STRATEGY) != Z_OK) { 303 free(in); 304 free(out); 305 return 1; 306 } 307 strm.next_in = in; 308 strm.avail_out = max; 309 strm.next_out = out; 310 while (len >= 5) { 311 if (strm.next_in[0] != 0) 312 break; 313 strm.avail_in = strm.next_in[1] + (strm.next_in[2] << 8); 314 strm.next_in += 5; 315 len -= 5; 316 if (strm.avail_in != 0) { 317 if (len < strm.avail_in) 318 break; 319 len -= strm.avail_in; 320 (void)deflate(&strm, Z_NO_FLUSH); 321 if (strm.avail_in != 0 || strm.avail_out == 0) 322 break; 323 } 324 } 325 (void)deflate(&strm, Z_SYNC_FLUSH); 326 (void)deflateEnd(&strm); 327 free(in); 328 if (len != 0 || strm.avail_out == 0) { 329 free(out); 330 return 1; 331 } 332 333 /* overwrite stored data with compressed data */ 334 (void)lseek(log->fd, log->mark_off, SEEK_SET); 335 len = max - strm.avail_out; 336 if (write(log->fd, out, len) != len) { 337 free(out); 338 return 1; 339 } 340 free(out); 341 342 /* write last empty block, crc, and length */ 343 log->mark_off = log->last_off = lseek(log->fd, 0, SEEK_CUR); 344 temp[0] = 1; 345 dice_ulg(0xffffL << 16, temp + 1); 346 dice_ulg(log->crc, temp + 5); 347 dice_ulg(log->len, temp + 9); 348 if (write(log->fd, temp, 13) != 13) 349 return 1; 350 351 /* truncate file to discard remaining stored data and old trailer */ 352 ftruncate(log->fd, lseek(log->fd, 0, SEEK_CUR)); 353 354 /* update extra field to point to new last empty block */ 355 (void)lseek(log->fd, log->extra, SEEK_SET); 356 dice_off(log->mark_off, temp); 357 dice_off(log->last_off, temp + 8); 358 if (write(log->fd, temp, 16) != 16) 359 return 1; 360 return 0; 361 } 362 363 /* maximum accumulation of stored blocks before compressing */ 364 #define MAX_STORED 1048576 365 366 /* close log object */ 367 int gzlog_close(void *obj) 368 { 369 unsigned char temp[8]; 370 gz_log *log; 371 372 /* check object */ 373 log = (gz_log *)obj; 374 if (log == NULL || log->id != GZLOGID) 375 return 1; 376 377 /* go to start of most recent block being written */ 378 (void)lseek(log->fd, log->last_off, SEEK_SET); 379 380 /* if some stuff was put there, update block */ 381 if (log->stored) { 382 temp[0] = 0; 383 dice_ulg(log->stored + ((unsigned long)(~log->stored) << 16), 384 temp + 1); 385 if (write(log->fd, temp, 5) != 5) 386 return 1; 387 log->last_off = lseek(log->fd, log->stored, SEEK_CUR); 388 } 389 390 /* write last block (empty) */ 391 if (write(log->fd, "\001\000\000\377\377", 5) != 5) 392 return 1; 393 394 /* write updated crc and uncompressed length */ 395 dice_ulg(log->crc, temp); 396 dice_ulg(log->len, temp + 4); 397 if (write(log->fd, temp, 8) != 8) 398 return 1; 399 400 /* put offset of that last block in gzip extra block */ 401 (void)lseek(log->fd, log->extra + 8, SEEK_SET); 402 dice_off(log->last_off, temp); 403 if (write(log->fd, temp, 8) != 8) 404 return 1; 405 406 /* if more than 1 MB stored, then time to compress it */ 407 if (log->last_off - log->mark_off > MAX_STORED) { 408 if (recomp(log)) 409 return 1; 410 } 411 412 /* unlock and close file */ 413 log_clean(log); 414 return 0; 415 } 416