1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3 * gsf-input-gzip.c: wrapper to uncompress gzipped input
4 *
5 * Copyright (C) 2002-2006 Jody Goldberg (jody@gnome.org)
6 * Copyright (C) 2005-2006 Morten Welinder (terra@gnome.org)
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of version 2.1 of the GNU Lesser General Public
10 * License as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
20 * USA
21 */
22
23 #include <gsf-config.h>
24 #include <gsf/gsf-input-gzip.h>
25 #include <gsf/gsf.h>
26 #include <glib/gi18n-lib.h>
27
28 #include <zlib.h>
29 #include <string.h>
30
31 #define Z_BUFSIZE 0x100
32
33 static GObjectClass *parent_class;
34
35 struct _GsfInputGZip {
36 GsfInput input;
37
38 GsfInput *source; /* compressed data */
39 gboolean raw; /* No header and no trailer. */
40 GError *err;
41 gsf_off_t uncompressed_size;
42 gboolean stop_byte_added;
43
44 z_stream stream;
45 guint8 const *gzipped_data;
46 uLong crc; /* crc32 of uncompressed data */
47
48 guint8 *buf;
49 size_t buf_size;
50
51 gsf_off_t header_size, trailer_size;
52 gsf_off_t seek_skipped;
53 };
54
55 typedef struct {
56 GsfInputClass input_class;
57 } GsfInputGZipClass;
58
59 enum {
60 PROP_0,
61 PROP_RAW,
62 PROP_SOURCE,
63 PROP_UNCOMPRESSED_SIZE
64 };
65
66 /* gzip flag byte */
67 #define GZIP_IS_ASCII 0x01 /* file contains text ? */
68 #define GZIP_HEADER_CRC 0x02 /* there is a CRC in the header */
69 #define GZIP_EXTRA_FIELD 0x04 /* there is an 'extra' field */
70 #define GZIP_ORIGINAL_NAME 0x08 /* the original is stored */
71 #define GZIP_HAS_COMMENT 0x10 /* There is a comment in the header */
72 #define GZIP_HEADER_FLAGS (unsigned)(GZIP_IS_ASCII |GZIP_HEADER_CRC |GZIP_EXTRA_FIELD |GZIP_ORIGINAL_NAME |GZIP_HAS_COMMENT)
73
74 static gboolean
check_header(GsfInputGZip * input)75 check_header (GsfInputGZip *input)
76 {
77 if (input->raw) {
78 input->header_size = 0;
79 input->trailer_size = 0;
80 } else {
81 static guint8 const signature[2] = {0x1f, 0x8b};
82 guint8 const *data;
83 unsigned flags, len;
84 guint32 modutime;
85
86 /* Check signature */
87 if (NULL == (data = gsf_input_read (input->source, 2 + 1 + 1 + 6, NULL)) ||
88 0 != memcmp (data, signature, sizeof (signature)))
89 return TRUE;
90
91 /* verify flags and compression type */
92 flags = data[3];
93 if (data[2] != Z_DEFLATED || (flags & ~GZIP_HEADER_FLAGS) != 0)
94 return TRUE;
95
96 modutime = GSF_LE_GET_GUINT32 (data + 4);
97 if (modutime != 0) {
98 GDateTime *modtime = g_date_time_new_from_unix_utc (modutime);
99 gsf_input_set_modtime (GSF_INPUT (input), modtime);
100 g_date_time_unref (modtime);
101 }
102
103 /* If we have the size, don't bother seeking to the end. */
104 if (input->uncompressed_size < 0) {
105 /* Get the uncompressed size */
106 if (gsf_input_seek (input->source, (gsf_off_t) -4, G_SEEK_END) ||
107 NULL == (data = gsf_input_read (input->source, 4, NULL)))
108 return TRUE;
109 /* FIXME, but how? The size read here is modulo 2^32. */
110 input->uncompressed_size = GSF_LE_GET_GUINT32 (data);
111
112 if (input->uncompressed_size / 1000 > gsf_input_size (input->source)) {
113 g_warning ("Suspiciously well compressed file with better than 1000:1 ratio.\n"
114 "It is probably truncated or corrupt");
115 }
116 }
117
118 if (gsf_input_seek (input->source, 2 + 1 + 1 + 6, G_SEEK_SET))
119 return TRUE;
120
121 if (flags & GZIP_EXTRA_FIELD) {
122 if (NULL == (data = gsf_input_read (input->source, 2, NULL)))
123 return TRUE;
124 len = GSF_LE_GET_GUINT16 (data);
125 if (NULL == gsf_input_read (input->source, len, NULL))
126 return TRUE;
127 }
128 if (flags & GZIP_ORIGINAL_NAME) {
129 /* Skip over the filename (which is in ISO 8859-1 encoding). */
130 do {
131 if (NULL == (data = gsf_input_read (input->source, 1, NULL)))
132 return TRUE;
133 } while (*data != 0);
134 }
135
136 if (flags & GZIP_HAS_COMMENT) {
137 /* Skip over the comment (which is in ISO 8859-1 encoding). */
138 do {
139 if (NULL == (data = gsf_input_read (input->source, 1, NULL)))
140 return TRUE;
141 } while (*data != 0);
142 }
143
144 if (flags & GZIP_HEADER_CRC &&
145 NULL == (data = gsf_input_read (input->source, 2, NULL)))
146 return TRUE;
147
148 input->header_size = input->source->cur_offset;
149 /* the last 8 bytes are the crc and size. */
150 input->trailer_size = 8;
151 }
152
153 gsf_input_set_size (GSF_INPUT (input), input->uncompressed_size);
154
155 if (gsf_input_remaining (input->source) < input->trailer_size)
156 return TRUE; /* No room for payload */
157
158 return FALSE;
159 }
160
161 static gboolean
init_zip(GsfInputGZip * gzip,GError ** err)162 init_zip (GsfInputGZip *gzip, GError **err)
163 {
164 gsf_off_t cur_pos;
165
166 if (Z_OK != inflateInit2 (&(gzip->stream), -MAX_WBITS)) {
167 if (err != NULL)
168 *err = g_error_new (gsf_input_error_id (), 0,
169 _("Unable to initialize zlib"));
170 return TRUE;
171 }
172
173 cur_pos = gsf_input_tell (gzip->source);
174 if (gsf_input_seek (gzip->source, 0, G_SEEK_SET)) {
175 if (err)
176 *err = g_error_new (gsf_input_error_id (), 0,
177 _("Failed to rewind source"));
178 return TRUE;
179 }
180
181 if (check_header (gzip) != FALSE) {
182 if (err != NULL)
183 *err = g_error_new (gsf_input_error_id (), 0,
184 _("Invalid gzip header"));
185 if (gsf_input_seek (gzip->source, cur_pos, G_SEEK_SET)) {
186 g_warning ("attempt to restore position failed ??");
187 }
188 return TRUE;
189 }
190
191 return FALSE;
192 }
193
194 static void
gsf_input_gzip_set_source(GsfInputGZip * gzip,GsfInput * source)195 gsf_input_gzip_set_source (GsfInputGZip *gzip, GsfInput *source)
196 {
197 if (source)
198 g_object_ref (GSF_INPUT (source));
199 if (gzip->source)
200 g_object_unref (gzip->source);
201 gzip->source = source;
202 }
203
204
205 /**
206 * gsf_input_gzip_new:
207 * @source: The underlying data source.
208 * @err: (allow-none): place to store a #GError if anything goes wrong
209 *
210 * Adds a reference to @source.
211 *
212 * Returns: a new file or %NULL.
213 **/
214 GsfInput *
gsf_input_gzip_new(GsfInput * source,GError ** err)215 gsf_input_gzip_new (GsfInput *source, GError **err)
216 {
217 GsfInputGZip *gzip;
218
219 g_return_val_if_fail (GSF_IS_INPUT (source), NULL);
220
221 gzip = g_object_new (GSF_INPUT_GZIP_TYPE,
222 "source", source,
223 NULL);
224
225 if (gzip->err) {
226 if (err)
227 *err = g_error_copy (gzip->err);
228 g_object_unref (gzip);
229 return NULL;
230 }
231 gsf_input_set_name (GSF_INPUT (gzip), gsf_input_name (source));
232
233 return GSF_INPUT (gzip);
234 }
235
236 static void
gsf_input_gzip_finalize(GObject * obj)237 gsf_input_gzip_finalize (GObject *obj)
238 {
239 GsfInputGZip *gzip = (GsfInputGZip *)obj;
240
241 gsf_input_gzip_set_source (gzip, NULL);
242
243 g_free (gzip->buf);
244
245 if (gzip->stream.state != NULL)
246 inflateEnd (&(gzip->stream));
247
248 g_clear_error (&gzip->err);
249
250 parent_class->finalize (obj);
251 }
252
253 static GsfInput *
gsf_input_gzip_dup(GsfInput * src_input,GError ** err)254 gsf_input_gzip_dup (GsfInput *src_input, GError **err)
255 {
256 GsfInputGZip const *src = (GsfInputGZip *)src_input;
257 GsfInputGZip *dst;
258 GsfInput *src_source_copy;
259
260 if (src->source) {
261 src_source_copy = gsf_input_dup (src->source, err);
262 if (err)
263 return NULL;
264 } else
265 src_source_copy = NULL;
266
267 dst = g_object_new (GSF_INPUT_GZIP_TYPE,
268 "source", src_source_copy,
269 "raw", src->raw,
270 NULL);
271
272 if (src_source_copy)
273 g_object_unref (src_source_copy);
274
275 if (src->err) {
276 g_clear_error (&dst->err);
277 dst->err = g_error_copy (src->err);
278 } else if (dst->err) {
279 if (err)
280 *err = g_error_copy (dst->err);
281 g_object_unref (dst);
282 return NULL;
283 }
284
285 return GSF_INPUT (dst);
286 }
287
288 static guint8 const *
gsf_input_gzip_read(GsfInput * input,size_t num_bytes,guint8 * buffer)289 gsf_input_gzip_read (GsfInput *input, size_t num_bytes, guint8 *buffer)
290 {
291 GsfInputGZip *gzip = GSF_INPUT_GZIP (input);
292
293 if (buffer == NULL) {
294 if (gzip->buf_size < num_bytes) {
295 gzip->buf_size = MAX (num_bytes, 256);
296 g_free (gzip->buf);
297 gzip->buf = g_new (guint8, gzip->buf_size);
298 }
299 buffer = gzip->buf;
300 }
301
302 gzip->stream.next_out = buffer;
303 gzip->stream.avail_out = num_bytes;
304 while (gzip->stream.avail_out != 0) {
305 int zerr;
306 if (gzip->stream.avail_in == 0) {
307 gsf_off_t remain = gsf_input_remaining (gzip->source);
308 if (remain <= gzip->trailer_size) {
309 if (remain < gzip->trailer_size || gzip->stop_byte_added) {
310 g_clear_error (&gzip->err);
311 gzip->err = g_error_new
312 (gsf_input_error_id (), 0,
313 _("truncated source"));
314 return NULL;
315 }
316 /* zlib requires an extra byte. */
317 gzip->stream.avail_in = 1;
318 gzip->gzipped_data = "";
319 gzip->stop_byte_added = TRUE;
320 } else {
321 size_t n = MIN (remain - gzip->trailer_size,
322 Z_BUFSIZE);
323
324 gzip->gzipped_data =
325 gsf_input_read (gzip->source, n, NULL);
326 if (!gzip->gzipped_data) {
327 g_clear_error (&gzip->err);
328 gzip->err = g_error_new
329 (gsf_input_error_id (), 0,
330 _("Failed to read from source"));
331 return NULL;
332 }
333 gzip->stream.avail_in = n;
334 }
335 gzip->stream.next_in = (Byte *)gzip->gzipped_data;
336 }
337 zerr = inflate (&(gzip->stream), Z_NO_FLUSH);
338 if (zerr != Z_OK) {
339 if (zerr != Z_STREAM_END)
340 return NULL;
341 /* Premature end of stream. */
342 if (gzip->stream.avail_out != 0)
343 return NULL;
344 }
345 }
346
347 gzip->crc = crc32 (gzip->crc, buffer, (uInt)(gzip->stream.next_out - buffer));
348 return buffer;
349 }
350
351 static gboolean
gsf_input_gzip_seek(GsfInput * input,gsf_off_t offset,GSeekType whence)352 gsf_input_gzip_seek (GsfInput *input, gsf_off_t offset, GSeekType whence)
353 {
354 GsfInputGZip *gzip = GSF_INPUT_GZIP (input);
355 /* Global flag -- we don't want one per stream. */
356 static gboolean warned = FALSE;
357 gsf_off_t pos = offset;
358
359 /* Note, that pos has already been sanity checked. */
360 switch (whence) {
361 case G_SEEK_SET : break;
362 case G_SEEK_CUR : pos += input->cur_offset; break;
363 case G_SEEK_END : pos += input->size; break;
364 default : return TRUE;
365 }
366
367 if (pos < input->cur_offset) {
368 if (gsf_input_seek (gzip->source, gzip->header_size, G_SEEK_SET))
369 return TRUE;
370 gzip->crc = crc32 (0L, Z_NULL, 0);
371 gzip->stream.avail_in = 0;
372 if (inflateReset (&(gzip->stream)) != Z_OK)
373 return TRUE;
374 input->cur_offset = 0;
375 }
376
377 if (gsf_input_seek_emulate (input, pos))
378 return TRUE;
379
380 gzip->seek_skipped += pos;
381 if (!warned &&
382 gzip->seek_skipped != pos && /* Don't warn for single seek. */
383 gzip->seek_skipped >= 1000000) {
384 warned = TRUE;
385 g_warning ("Seeking in gzipped streams is awfully slow.");
386 }
387
388 return FALSE;
389 }
390
391 static void
gsf_input_gzip_init(GObject * obj)392 gsf_input_gzip_init (GObject *obj)
393 {
394 GsfInputGZip *gzip = GSF_INPUT_GZIP (obj);
395
396 gzip->source = NULL;
397 gzip->raw = FALSE;
398 gzip->uncompressed_size = -1;
399 gzip->err = NULL;
400 gzip->stream.zalloc = (alloc_func)0;
401 gzip->stream.zfree = (free_func)0;
402 gzip->stream.opaque = (voidpf)0;
403 gzip->stream.next_in = Z_NULL;
404 gzip->stream.next_out = Z_NULL;
405 gzip->stream.avail_in = gzip->stream.avail_out = 0;
406 gzip->crc = crc32 (0L, Z_NULL, 0);
407 gzip->buf = NULL;
408 gzip->buf_size = 0;
409 gzip->seek_skipped = 0;
410 }
411
412 static void
gsf_input_gzip_get_property(GObject * object,guint property_id,GValue * value,GParamSpec * pspec)413 gsf_input_gzip_get_property (GObject *object,
414 guint property_id,
415 GValue *value,
416 GParamSpec *pspec)
417 {
418 GsfInputGZip *gzip = (GsfInputGZip *)object;
419
420 switch (property_id) {
421 case PROP_RAW:
422 g_value_set_boolean (value, gzip->raw);
423 break;
424 case PROP_SOURCE:
425 g_value_set_object (value, gzip->source);
426 break;
427 case PROP_UNCOMPRESSED_SIZE:
428 g_value_set_int64 (value, gzip->uncompressed_size);
429 break;
430 default:
431 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
432 break;
433 }
434 }
435
436 static void
gsf_input_gzip_set_property(GObject * object,guint property_id,GValue const * value,GParamSpec * pspec)437 gsf_input_gzip_set_property (GObject *object,
438 guint property_id,
439 GValue const *value,
440 GParamSpec *pspec)
441 {
442 GsfInputGZip *gzip = (GsfInputGZip *)object;
443
444 switch (property_id) {
445 case PROP_RAW:
446 gzip->raw = g_value_get_boolean (value);
447 break;
448 case PROP_SOURCE:
449 gsf_input_gzip_set_source (gzip, g_value_get_object (value));
450 break;
451 case PROP_UNCOMPRESSED_SIZE:
452 gzip->uncompressed_size = g_value_get_int64 (value);
453 break;
454 default:
455 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
456 break;
457 }
458 }
459
460 static GObject*
gsf_input_gzip_constructor(GType type,guint n_construct_properties,GObjectConstructParam * construct_params)461 gsf_input_gzip_constructor (GType type,
462 guint n_construct_properties,
463 GObjectConstructParam *construct_params)
464 {
465 GsfInputGZip *gzip;
466
467 gzip = (GsfInputGZip *)(parent_class->constructor (type,
468 n_construct_properties,
469 construct_params));
470
471 if (!gzip->source) {
472 g_clear_error (&gzip->err);
473 gzip->err = g_error_new (gsf_input_error_id (), 0,
474 _("NULL source"));
475 } else if (gzip->raw && gzip->uncompressed_size < 0) {
476 g_clear_error (&gzip->err);
477 gzip->err = g_error_new (gsf_input_error_id (), 0,
478 _("Uncompressed size not set"));
479 } else if (init_zip (gzip, &gzip->err) != FALSE) {
480 /* Nothing more. */
481 }
482
483 return (GObject *)gzip;
484 }
485
486 static void
gsf_input_gzip_class_init(GObjectClass * gobject_class)487 gsf_input_gzip_class_init (GObjectClass *gobject_class)
488 {
489 GsfInputClass *input_class = GSF_INPUT_CLASS (gobject_class);
490
491 gobject_class->constructor = gsf_input_gzip_constructor;
492 gobject_class->finalize = gsf_input_gzip_finalize;
493 gobject_class->set_property = gsf_input_gzip_set_property;
494 gobject_class->get_property = gsf_input_gzip_get_property;
495 input_class->Dup = gsf_input_gzip_dup;
496 input_class->Read = gsf_input_gzip_read;
497 input_class->Seek = gsf_input_gzip_seek;
498
499 g_object_class_install_property
500 (gobject_class,
501 PROP_RAW,
502 g_param_spec_boolean ("raw",
503 _("Raw"),
504 _("Whether to read compressed data with no header and no trailer"),
505 FALSE,
506 G_PARAM_STATIC_STRINGS |
507 G_PARAM_READWRITE |
508 G_PARAM_CONSTRUCT_ONLY));
509
510 g_object_class_install_property
511 (gobject_class,
512 PROP_SOURCE,
513 g_param_spec_object ("source",
514 _("Source"),
515 _("Where the compressed data comes from"),
516 GSF_INPUT_TYPE,
517 G_PARAM_STATIC_STRINGS |
518 G_PARAM_READWRITE |
519 G_PARAM_CONSTRUCT_ONLY));
520 /**
521 * GsfInputGzip:uncompressed_size:
522 *
523 * The size that the data will have after uncompression.
524 * The is mandatory for raw streams and if the uncompressed size is
525 * larger than 4GB.
526 */
527 g_object_class_install_property
528 (gobject_class,
529 PROP_UNCOMPRESSED_SIZE,
530 g_param_spec_int64 ("uncompressed-size",
531 _("Size after decompression"),
532 _("The source's uncompressed size"),
533 -1, G_MAXINT64, -1,
534 G_PARAM_STATIC_STRINGS |
535 G_PARAM_READWRITE |
536 G_PARAM_CONSTRUCT_ONLY));
537
538 parent_class = g_type_class_peek_parent (gobject_class);
539 }
540
541 GSF_CLASS (GsfInputGZip, gsf_input_gzip,
542 gsf_input_gzip_class_init, gsf_input_gzip_init,
543 GSF_INPUT_TYPE)
544
545