1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  * gsf-input-gzip.c: wrapper to uncompress gzipped input
4  *
5  * Copyright (C) 2002-2006 Jody Goldberg (jody@gnome.org)
6  * Copyright (C) 2005-2006 Morten Welinder (terra@gnome.org)
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of version 2.1 of the GNU Lesser General Public
10  * License as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
20  * USA
21  */
22 
23 #include <gsf-config.h>
24 #include <gsf/gsf-input-gzip.h>
25 #include <gsf/gsf.h>
26 #include <glib/gi18n-lib.h>
27 
28 #include <zlib.h>
29 #include <string.h>
30 
31 #define Z_BUFSIZE 0x100
32 
33 static GObjectClass *parent_class;
34 
35 struct _GsfInputGZip {
36 	GsfInput input;
37 
38 	GsfInput *source; /* compressed data */
39 	gboolean raw; /* No header and no trailer.  */
40 	GError *err;
41 	gsf_off_t uncompressed_size;
42 	gboolean stop_byte_added;
43 
44 	z_stream  stream;
45 	guint8 const *gzipped_data;
46 	uLong     crc;     /* crc32 of uncompressed data */
47 
48 	guint8   *buf;
49 	size_t    buf_size;
50 
51 	gsf_off_t header_size, trailer_size;
52 	gsf_off_t seek_skipped;
53 };
54 
55 typedef struct {
56 	GsfInputClass input_class;
57 } GsfInputGZipClass;
58 
59 enum {
60 	PROP_0,
61 	PROP_RAW,
62 	PROP_SOURCE,
63 	PROP_UNCOMPRESSED_SIZE
64 };
65 
66 /* gzip flag byte */
67 #define GZIP_IS_ASCII		0x01 /* file contains text ? */
68 #define GZIP_HEADER_CRC		0x02 /* there is a CRC in the header */
69 #define GZIP_EXTRA_FIELD	0x04 /* there is an 'extra' field */
70 #define GZIP_ORIGINAL_NAME	0x08 /* the original is stored */
71 #define GZIP_HAS_COMMENT	0x10 /* There is a comment in the header */
72 #define GZIP_HEADER_FLAGS (unsigned)(GZIP_IS_ASCII |GZIP_HEADER_CRC |GZIP_EXTRA_FIELD |GZIP_ORIGINAL_NAME |GZIP_HAS_COMMENT)
73 
74 static gboolean
check_header(GsfInputGZip * input)75 check_header (GsfInputGZip *input)
76 {
77 	if (input->raw) {
78 		input->header_size = 0;
79 		input->trailer_size = 0;
80 	} else {
81 		static guint8 const signature[2] = {0x1f, 0x8b};
82 		guint8 const *data;
83 		unsigned flags, len;
84 		guint32 modutime;
85 
86 		/* Check signature */
87 		if (NULL == (data = gsf_input_read (input->source, 2 + 1 + 1 + 6, NULL)) ||
88 		    0 != memcmp (data, signature, sizeof (signature)))
89 			return TRUE;
90 
91 		/* verify flags and compression type */
92 		flags  = data[3];
93 		if (data[2] != Z_DEFLATED || (flags & ~GZIP_HEADER_FLAGS) != 0)
94 			return TRUE;
95 
96 		modutime = GSF_LE_GET_GUINT32 (data + 4);
97 		if (modutime != 0) {
98 			GDateTime *modtime = g_date_time_new_from_unix_utc (modutime);
99 			gsf_input_set_modtime (GSF_INPUT (input), modtime);
100 			g_date_time_unref (modtime);
101 		}
102 
103 		/* If we have the size, don't bother seeking to the end.  */
104 		if (input->uncompressed_size < 0) {
105 			/* Get the uncompressed size */
106 			if (gsf_input_seek (input->source, (gsf_off_t) -4, G_SEEK_END) ||
107 			    NULL == (data = gsf_input_read (input->source, 4, NULL)))
108 				return TRUE;
109 			/* FIXME, but how?  The size read here is modulo 2^32.  */
110 			input->uncompressed_size = GSF_LE_GET_GUINT32 (data);
111 
112 			if (input->uncompressed_size / 1000 > gsf_input_size (input->source)) {
113 				g_warning ("Suspiciously well compressed file with better than 1000:1 ratio.\n"
114 					   "It is probably truncated or corrupt");
115 			}
116 		}
117 
118 		if (gsf_input_seek (input->source, 2 + 1 + 1 + 6, G_SEEK_SET))
119 			return TRUE;
120 
121 		if (flags & GZIP_EXTRA_FIELD) {
122 			if (NULL == (data = gsf_input_read (input->source, 2, NULL)))
123 				return TRUE;
124 			len = GSF_LE_GET_GUINT16 (data);
125 			if (NULL == gsf_input_read (input->source, len, NULL))
126 				return TRUE;
127 		}
128 		if (flags & GZIP_ORIGINAL_NAME) {
129 			/* Skip over the filename (which is in ISO 8859-1 encoding).  */
130 			do {
131 				if (NULL == (data = gsf_input_read (input->source, 1, NULL)))
132 					return TRUE;
133 			} while (*data != 0);
134 		}
135 
136 		if (flags & GZIP_HAS_COMMENT) {
137 			/* Skip over the comment (which is in ISO 8859-1 encoding).  */
138 			do {
139 				if (NULL == (data = gsf_input_read (input->source, 1, NULL)))
140 					return TRUE;
141 			} while (*data != 0);
142 		}
143 
144 		if (flags & GZIP_HEADER_CRC &&
145 		    NULL == (data = gsf_input_read (input->source, 2, NULL)))
146 			return TRUE;
147 
148 		input->header_size = input->source->cur_offset;
149 		/* the last 8 bytes are the crc and size.  */
150 		input->trailer_size = 8;
151 	}
152 
153 	gsf_input_set_size (GSF_INPUT (input), input->uncompressed_size);
154 
155 	if (gsf_input_remaining (input->source) < input->trailer_size)
156 		return TRUE;	/* No room for payload */
157 
158 	return FALSE;
159 }
160 
161 static gboolean
init_zip(GsfInputGZip * gzip,GError ** err)162 init_zip (GsfInputGZip *gzip, GError **err)
163 {
164 	gsf_off_t cur_pos;
165 
166 	if (Z_OK != inflateInit2 (&(gzip->stream), -MAX_WBITS)) {
167 		if (err != NULL)
168 			*err = g_error_new (gsf_input_error_id (), 0,
169 					    _("Unable to initialize zlib"));
170 		return TRUE;
171 	}
172 
173 	cur_pos = gsf_input_tell (gzip->source);
174 	if (gsf_input_seek (gzip->source, 0, G_SEEK_SET)) {
175 		if (err)
176 			*err = g_error_new (gsf_input_error_id (), 0,
177 					    _("Failed to rewind source"));
178 		return TRUE;
179 	}
180 
181 	if (check_header (gzip) != FALSE) {
182 		if (err != NULL)
183 			*err = g_error_new (gsf_input_error_id (), 0,
184 					    _("Invalid gzip header"));
185 		if (gsf_input_seek (gzip->source, cur_pos, G_SEEK_SET)) {
186 			g_warning ("attempt to restore position failed ??");
187 		}
188 		return TRUE;
189 	}
190 
191 	return FALSE;
192 }
193 
194 static void
gsf_input_gzip_set_source(GsfInputGZip * gzip,GsfInput * source)195 gsf_input_gzip_set_source (GsfInputGZip *gzip, GsfInput *source)
196 {
197 	if (source)
198 		g_object_ref (GSF_INPUT (source));
199 	if (gzip->source)
200 		g_object_unref (gzip->source);
201 	gzip->source = source;
202 }
203 
204 
205 /**
206  * gsf_input_gzip_new:
207  * @source: The underlying data source.
208  * @err: (allow-none): place to store a #GError if anything goes wrong
209  *
210  * Adds a reference to @source.
211  *
212  * Returns: a new file or %NULL.
213  **/
214 GsfInput *
gsf_input_gzip_new(GsfInput * source,GError ** err)215 gsf_input_gzip_new (GsfInput *source, GError **err)
216 {
217 	GsfInputGZip *gzip;
218 
219 	g_return_val_if_fail (GSF_IS_INPUT (source), NULL);
220 
221 	gzip = g_object_new (GSF_INPUT_GZIP_TYPE,
222 			     "source", source,
223 			     NULL);
224 
225 	if (gzip->err) {
226 		if (err)
227 			*err = g_error_copy (gzip->err);
228 		g_object_unref (gzip);
229 		return NULL;
230 	}
231 	gsf_input_set_name (GSF_INPUT (gzip), gsf_input_name (source));
232 
233 	return GSF_INPUT (gzip);
234 }
235 
236 static void
gsf_input_gzip_finalize(GObject * obj)237 gsf_input_gzip_finalize (GObject *obj)
238 {
239 	GsfInputGZip *gzip = (GsfInputGZip *)obj;
240 
241 	gsf_input_gzip_set_source (gzip, NULL);
242 
243 	g_free (gzip->buf);
244 
245 	if (gzip->stream.state != NULL)
246 		inflateEnd (&(gzip->stream));
247 
248 	g_clear_error (&gzip->err);
249 
250 	parent_class->finalize (obj);
251 }
252 
253 static GsfInput *
gsf_input_gzip_dup(GsfInput * src_input,GError ** err)254 gsf_input_gzip_dup (GsfInput *src_input, GError **err)
255 {
256 	GsfInputGZip const *src = (GsfInputGZip *)src_input;
257 	GsfInputGZip *dst;
258 	GsfInput *src_source_copy;
259 
260 	if (src->source) {
261 		src_source_copy = gsf_input_dup (src->source, err);
262 		if (err)
263 			return NULL;
264 	} else
265 		src_source_copy = NULL;
266 
267 	dst = g_object_new (GSF_INPUT_GZIP_TYPE,
268 			    "source", src_source_copy,
269 			    "raw", src->raw,
270 			    NULL);
271 
272 	if (src_source_copy)
273 		g_object_unref (src_source_copy);
274 
275 	if (src->err) {
276 		g_clear_error (&dst->err);
277 		dst->err = g_error_copy (src->err);
278 	} else if (dst->err) {
279 		if (err)
280 			*err = g_error_copy (dst->err);
281 		g_object_unref (dst);
282 		return NULL;
283 	}
284 
285 	return GSF_INPUT (dst);
286 }
287 
288 static guint8 const *
gsf_input_gzip_read(GsfInput * input,size_t num_bytes,guint8 * buffer)289 gsf_input_gzip_read (GsfInput *input, size_t num_bytes, guint8 *buffer)
290 {
291 	GsfInputGZip *gzip = GSF_INPUT_GZIP (input);
292 
293 	if (buffer == NULL) {
294 		if (gzip->buf_size < num_bytes) {
295 			gzip->buf_size = MAX (num_bytes, 256);
296 			g_free (gzip->buf);
297 			gzip->buf = g_new (guint8, gzip->buf_size);
298 		}
299 		buffer = gzip->buf;
300 	}
301 
302 	gzip->stream.next_out = buffer;
303 	gzip->stream.avail_out = num_bytes;
304 	while (gzip->stream.avail_out != 0) {
305 		int zerr;
306 		if (gzip->stream.avail_in == 0) {
307 			gsf_off_t remain = gsf_input_remaining (gzip->source);
308 			if (remain <= gzip->trailer_size) {
309 				if (remain < gzip->trailer_size || gzip->stop_byte_added) {
310 					g_clear_error (&gzip->err);
311 					gzip->err = g_error_new
312 						(gsf_input_error_id (), 0,
313 						 _("truncated source"));
314 					return NULL;
315 				}
316 				/* zlib requires an extra byte.  */
317 				gzip->stream.avail_in = 1;
318 				gzip->gzipped_data = "";
319 				gzip->stop_byte_added = TRUE;
320 			} else {
321 				size_t n = MIN (remain - gzip->trailer_size,
322 						Z_BUFSIZE);
323 
324 				gzip->gzipped_data =
325 					gsf_input_read (gzip->source, n, NULL);
326 				if (!gzip->gzipped_data) {
327 					g_clear_error (&gzip->err);
328 					gzip->err = g_error_new
329 						(gsf_input_error_id (), 0,
330 						 _("Failed to read from source"));
331 					return NULL;
332 				}
333 				gzip->stream.avail_in = n;
334 			}
335 			gzip->stream.next_in = (Byte *)gzip->gzipped_data;
336 		}
337 		zerr = inflate (&(gzip->stream), Z_NO_FLUSH);
338 		if (zerr != Z_OK) {
339 			if (zerr != Z_STREAM_END)
340 				return NULL;
341 			/* Premature end of stream.  */
342 			if (gzip->stream.avail_out != 0)
343 				return NULL;
344 		}
345 	}
346 
347 	gzip->crc = crc32 (gzip->crc, buffer, (uInt)(gzip->stream.next_out - buffer));
348 	return buffer;
349 }
350 
351 static gboolean
gsf_input_gzip_seek(GsfInput * input,gsf_off_t offset,GSeekType whence)352 gsf_input_gzip_seek (GsfInput *input, gsf_off_t offset, GSeekType whence)
353 {
354 	GsfInputGZip *gzip = GSF_INPUT_GZIP (input);
355 	/* Global flag -- we don't want one per stream.  */
356 	static gboolean warned = FALSE;
357 	gsf_off_t pos = offset;
358 
359 	/* Note, that pos has already been sanity checked.  */
360 	switch (whence) {
361 	case G_SEEK_SET : break;
362 	case G_SEEK_CUR : pos += input->cur_offset;	break;
363 	case G_SEEK_END : pos += input->size;		break;
364 	default : return TRUE;
365 	}
366 
367 	if (pos < input->cur_offset) {
368 		if (gsf_input_seek (gzip->source, gzip->header_size, G_SEEK_SET))
369 			return TRUE;
370 		gzip->crc = crc32 (0L, Z_NULL, 0);
371 		gzip->stream.avail_in = 0;
372 		if (inflateReset (&(gzip->stream)) != Z_OK)
373 			return TRUE;
374 		input->cur_offset = 0;
375 	}
376 
377 	if (gsf_input_seek_emulate (input, pos))
378 		return TRUE;
379 
380 	gzip->seek_skipped += pos;
381 	if (!warned &&
382 	    gzip->seek_skipped != pos && /* Don't warn for single seek.  */
383 	    gzip->seek_skipped >= 1000000) {
384 		warned = TRUE;
385 		g_warning ("Seeking in gzipped streams is awfully slow.");
386 	}
387 
388 	return FALSE;
389 }
390 
391 static void
gsf_input_gzip_init(GObject * obj)392 gsf_input_gzip_init (GObject *obj)
393 {
394 	GsfInputGZip *gzip = GSF_INPUT_GZIP (obj);
395 
396 	gzip->source = NULL;
397 	gzip->raw = FALSE;
398 	gzip->uncompressed_size = -1;
399 	gzip->err = NULL;
400 	gzip->stream.zalloc	= (alloc_func)0;
401 	gzip->stream.zfree	= (free_func)0;
402 	gzip->stream.opaque	= (voidpf)0;
403 	gzip->stream.next_in	= Z_NULL;
404 	gzip->stream.next_out	= Z_NULL;
405 	gzip->stream.avail_in	= gzip->stream.avail_out = 0;
406 	gzip->crc		= crc32 (0L, Z_NULL, 0);
407 	gzip->buf		= NULL;
408 	gzip->buf_size		= 0;
409 	gzip->seek_skipped = 0;
410 }
411 
412 static void
gsf_input_gzip_get_property(GObject * object,guint property_id,GValue * value,GParamSpec * pspec)413 gsf_input_gzip_get_property (GObject     *object,
414 			     guint        property_id,
415 			     GValue      *value,
416 			     GParamSpec  *pspec)
417 {
418 	GsfInputGZip *gzip = (GsfInputGZip *)object;
419 
420 	switch (property_id) {
421 	case PROP_RAW:
422 		g_value_set_boolean (value, gzip->raw);
423 		break;
424 	case PROP_SOURCE:
425 		g_value_set_object (value, gzip->source);
426 		break;
427 	case PROP_UNCOMPRESSED_SIZE:
428 		g_value_set_int64 (value, gzip->uncompressed_size);
429 		break;
430 	default:
431 		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
432 		break;
433 	}
434 }
435 
436 static void
gsf_input_gzip_set_property(GObject * object,guint property_id,GValue const * value,GParamSpec * pspec)437 gsf_input_gzip_set_property (GObject      *object,
438 			     guint         property_id,
439 			     GValue const *value,
440 			     GParamSpec   *pspec)
441 {
442 	GsfInputGZip *gzip = (GsfInputGZip *)object;
443 
444 	switch (property_id) {
445 	case PROP_RAW:
446 		gzip->raw = g_value_get_boolean (value);
447 		break;
448 	case PROP_SOURCE:
449 		gsf_input_gzip_set_source (gzip, g_value_get_object (value));
450 		break;
451 	case PROP_UNCOMPRESSED_SIZE:
452 		gzip->uncompressed_size = g_value_get_int64 (value);
453 		break;
454 	default:
455 		G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
456 		break;
457 	}
458 }
459 
460 static GObject*
gsf_input_gzip_constructor(GType type,guint n_construct_properties,GObjectConstructParam * construct_params)461 gsf_input_gzip_constructor (GType                  type,
462 			    guint                  n_construct_properties,
463 			    GObjectConstructParam *construct_params)
464 {
465   GsfInputGZip *gzip;
466 
467   gzip = (GsfInputGZip *)(parent_class->constructor (type,
468 						     n_construct_properties,
469 						     construct_params));
470 
471   if (!gzip->source) {
472 	  g_clear_error (&gzip->err);
473 	  gzip->err = g_error_new (gsf_input_error_id (), 0,
474 				   _("NULL source"));
475   } else if (gzip->raw && gzip->uncompressed_size < 0) {
476 	  g_clear_error (&gzip->err);
477 	  gzip->err = g_error_new (gsf_input_error_id (), 0,
478 				   _("Uncompressed size not set"));
479   } else if (init_zip (gzip, &gzip->err) != FALSE) {
480 	  /* Nothing more.  */
481   }
482 
483   return (GObject *)gzip;
484 }
485 
486 static void
gsf_input_gzip_class_init(GObjectClass * gobject_class)487 gsf_input_gzip_class_init (GObjectClass *gobject_class)
488 {
489 	GsfInputClass *input_class = GSF_INPUT_CLASS (gobject_class);
490 
491 	gobject_class->constructor  = gsf_input_gzip_constructor;
492 	gobject_class->finalize     = gsf_input_gzip_finalize;
493 	gobject_class->set_property = gsf_input_gzip_set_property;
494 	gobject_class->get_property = gsf_input_gzip_get_property;
495 	input_class->Dup	    = gsf_input_gzip_dup;
496 	input_class->Read	    = gsf_input_gzip_read;
497 	input_class->Seek	    = gsf_input_gzip_seek;
498 
499 	g_object_class_install_property
500 		(gobject_class,
501 		 PROP_RAW,
502 		 g_param_spec_boolean ("raw",
503 				       _("Raw"),
504 				       _("Whether to read compressed data with no header and no trailer"),
505 				       FALSE,
506 				       G_PARAM_STATIC_STRINGS |
507 				       G_PARAM_READWRITE |
508 				       G_PARAM_CONSTRUCT_ONLY));
509 
510 	g_object_class_install_property
511 		(gobject_class,
512 		 PROP_SOURCE,
513 		 g_param_spec_object ("source",
514 				      _("Source"),
515 				      _("Where the compressed data comes from"),
516 				      GSF_INPUT_TYPE,
517 				      G_PARAM_STATIC_STRINGS |
518 				      G_PARAM_READWRITE |
519 				      G_PARAM_CONSTRUCT_ONLY));
520 	/**
521 	 * GsfInputGzip:uncompressed_size:
522 	 *
523 	 * The size that the data will have after uncompression.
524 	 * The is mandatory for raw streams and if the uncompressed size is
525 	 * larger than 4GB.
526 	 */
527 	g_object_class_install_property
528 		(gobject_class,
529 		 PROP_UNCOMPRESSED_SIZE,
530 		 g_param_spec_int64 ("uncompressed-size",
531 				     _("Size after decompression"),
532 				     _("The source's uncompressed size"),
533 				     -1, G_MAXINT64, -1,
534 				     G_PARAM_STATIC_STRINGS |
535 				     G_PARAM_READWRITE |
536 				     G_PARAM_CONSTRUCT_ONLY));
537 
538 	parent_class = g_type_class_peek_parent (gobject_class);
539 }
540 
541 GSF_CLASS (GsfInputGZip, gsf_input_gzip,
542 	   gsf_input_gzip_class_init, gsf_input_gzip_init,
543 	   GSF_INPUT_TYPE)
544 
545