/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ /* * gsf-input-gzip.c: wrapper to uncompress gzipped input * * Copyright (C) 2002-2006 Jody Goldberg (jody@gnome.org) * Copyright (C) 2005-2006 Morten Welinder (terra@gnome.org) * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2.1 of the GNU Lesser General Public * License as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 * USA */ #include #include #include #include #include #include #define Z_BUFSIZE 0x100 static GObjectClass *parent_class; struct _GsfInputGZip { GsfInput input; GsfInput *source; /* compressed data */ gboolean raw; /* No header and no trailer. */ GError *err; gsf_off_t uncompressed_size; gboolean stop_byte_added; z_stream stream; guint8 const *gzipped_data; uLong crc; /* crc32 of uncompressed data */ guint8 *buf; size_t buf_size; gsf_off_t header_size, trailer_size; gsf_off_t seek_skipped; }; typedef struct { GsfInputClass input_class; } GsfInputGZipClass; enum { PROP_0, PROP_RAW, PROP_SOURCE, PROP_UNCOMPRESSED_SIZE }; /* gzip flag byte */ #define GZIP_IS_ASCII 0x01 /* file contains text ? */ #define GZIP_HEADER_CRC 0x02 /* there is a CRC in the header */ #define GZIP_EXTRA_FIELD 0x04 /* there is an 'extra' field */ #define GZIP_ORIGINAL_NAME 0x08 /* the original is stored */ #define GZIP_HAS_COMMENT 0x10 /* There is a comment in the header */ #define GZIP_HEADER_FLAGS (unsigned)(GZIP_IS_ASCII |GZIP_HEADER_CRC |GZIP_EXTRA_FIELD |GZIP_ORIGINAL_NAME |GZIP_HAS_COMMENT) static gboolean check_header (GsfInputGZip *input) { if (input->raw) { input->header_size = 0; input->trailer_size = 0; } else { static guint8 const signature[2] = {0x1f, 0x8b}; guint8 const *data; unsigned flags, len; guint32 modutime; /* Check signature */ if (NULL == (data = gsf_input_read (input->source, 2 + 1 + 1 + 6, NULL)) || 0 != memcmp (data, signature, sizeof (signature))) return TRUE; /* verify flags and compression type */ flags = data[3]; if (data[2] != Z_DEFLATED || (flags & ~GZIP_HEADER_FLAGS) != 0) return TRUE; modutime = GSF_LE_GET_GUINT32 (data + 4); if (modutime != 0) { GDateTime *modtime = g_date_time_new_from_unix_utc (modutime); gsf_input_set_modtime (GSF_INPUT (input), modtime); g_date_time_unref (modtime); } /* If we have the size, don't bother seeking to the end. */ if (input->uncompressed_size < 0) { /* Get the uncompressed size */ if (gsf_input_seek (input->source, (gsf_off_t) -4, G_SEEK_END) || NULL == (data = gsf_input_read (input->source, 4, NULL))) return TRUE; /* FIXME, but how? The size read here is modulo 2^32. */ input->uncompressed_size = GSF_LE_GET_GUINT32 (data); if (input->uncompressed_size / 1000 > gsf_input_size (input->source)) { g_warning ("Suspiciously well compressed file with better than 1000:1 ratio.\n" "It is probably truncated or corrupt"); } } if (gsf_input_seek (input->source, 2 + 1 + 1 + 6, G_SEEK_SET)) return TRUE; if (flags & GZIP_EXTRA_FIELD) { if (NULL == (data = gsf_input_read (input->source, 2, NULL))) return TRUE; len = GSF_LE_GET_GUINT16 (data); if (NULL == gsf_input_read (input->source, len, NULL)) return TRUE; } if (flags & GZIP_ORIGINAL_NAME) { /* Skip over the filename (which is in ISO 8859-1 encoding). */ do { if (NULL == (data = gsf_input_read (input->source, 1, NULL))) return TRUE; } while (*data != 0); } if (flags & GZIP_HAS_COMMENT) { /* Skip over the comment (which is in ISO 8859-1 encoding). */ do { if (NULL == (data = gsf_input_read (input->source, 1, NULL))) return TRUE; } while (*data != 0); } if (flags & GZIP_HEADER_CRC && NULL == (data = gsf_input_read (input->source, 2, NULL))) return TRUE; input->header_size = input->source->cur_offset; /* the last 8 bytes are the crc and size. */ input->trailer_size = 8; } gsf_input_set_size (GSF_INPUT (input), input->uncompressed_size); if (gsf_input_remaining (input->source) < input->trailer_size) return TRUE; /* No room for payload */ return FALSE; } static gboolean init_zip (GsfInputGZip *gzip, GError **err) { gsf_off_t cur_pos; if (Z_OK != inflateInit2 (&(gzip->stream), -MAX_WBITS)) { if (err != NULL) *err = g_error_new (gsf_input_error_id (), 0, _("Unable to initialize zlib")); return TRUE; } cur_pos = gsf_input_tell (gzip->source); if (gsf_input_seek (gzip->source, 0, G_SEEK_SET)) { if (err) *err = g_error_new (gsf_input_error_id (), 0, _("Failed to rewind source")); return TRUE; } if (check_header (gzip) != FALSE) { if (err != NULL) *err = g_error_new (gsf_input_error_id (), 0, _("Invalid gzip header")); if (gsf_input_seek (gzip->source, cur_pos, G_SEEK_SET)) { g_warning ("attempt to restore position failed ??"); } return TRUE; } return FALSE; } static void gsf_input_gzip_set_source (GsfInputGZip *gzip, GsfInput *source) { if (source) g_object_ref (GSF_INPUT (source)); if (gzip->source) g_object_unref (gzip->source); gzip->source = source; } /** * gsf_input_gzip_new: * @source: The underlying data source. * @err: (allow-none): place to store a #GError if anything goes wrong * * Adds a reference to @source. * * Returns: a new file or %NULL. **/ GsfInput * gsf_input_gzip_new (GsfInput *source, GError **err) { GsfInputGZip *gzip; g_return_val_if_fail (GSF_IS_INPUT (source), NULL); gzip = g_object_new (GSF_INPUT_GZIP_TYPE, "source", source, NULL); if (gzip->err) { if (err) *err = g_error_copy (gzip->err); g_object_unref (gzip); return NULL; } gsf_input_set_name (GSF_INPUT (gzip), gsf_input_name (source)); return GSF_INPUT (gzip); } static void gsf_input_gzip_finalize (GObject *obj) { GsfInputGZip *gzip = (GsfInputGZip *)obj; gsf_input_gzip_set_source (gzip, NULL); g_free (gzip->buf); if (gzip->stream.state != NULL) inflateEnd (&(gzip->stream)); g_clear_error (&gzip->err); parent_class->finalize (obj); } static GsfInput * gsf_input_gzip_dup (GsfInput *src_input, GError **err) { GsfInputGZip const *src = (GsfInputGZip *)src_input; GsfInputGZip *dst; GsfInput *src_source_copy; if (src->source) { src_source_copy = gsf_input_dup (src->source, err); if (err) return NULL; } else src_source_copy = NULL; dst = g_object_new (GSF_INPUT_GZIP_TYPE, "source", src_source_copy, "raw", src->raw, NULL); if (src_source_copy) g_object_unref (src_source_copy); if (src->err) { g_clear_error (&dst->err); dst->err = g_error_copy (src->err); } else if (dst->err) { if (err) *err = g_error_copy (dst->err); g_object_unref (dst); return NULL; } return GSF_INPUT (dst); } static guint8 const * gsf_input_gzip_read (GsfInput *input, size_t num_bytes, guint8 *buffer) { GsfInputGZip *gzip = GSF_INPUT_GZIP (input); if (buffer == NULL) { if (gzip->buf_size < num_bytes) { gzip->buf_size = MAX (num_bytes, 256); g_free (gzip->buf); gzip->buf = g_new (guint8, gzip->buf_size); } buffer = gzip->buf; } gzip->stream.next_out = buffer; gzip->stream.avail_out = num_bytes; while (gzip->stream.avail_out != 0) { int zerr; if (gzip->stream.avail_in == 0) { gsf_off_t remain = gsf_input_remaining (gzip->source); if (remain <= gzip->trailer_size) { if (remain < gzip->trailer_size || gzip->stop_byte_added) { g_clear_error (&gzip->err); gzip->err = g_error_new (gsf_input_error_id (), 0, _("truncated source")); return NULL; } /* zlib requires an extra byte. */ gzip->stream.avail_in = 1; gzip->gzipped_data = ""; gzip->stop_byte_added = TRUE; } else { size_t n = MIN (remain - gzip->trailer_size, Z_BUFSIZE); gzip->gzipped_data = gsf_input_read (gzip->source, n, NULL); if (!gzip->gzipped_data) { g_clear_error (&gzip->err); gzip->err = g_error_new (gsf_input_error_id (), 0, _("Failed to read from source")); return NULL; } gzip->stream.avail_in = n; } gzip->stream.next_in = (Byte *)gzip->gzipped_data; } zerr = inflate (&(gzip->stream), Z_NO_FLUSH); if (zerr != Z_OK) { if (zerr != Z_STREAM_END) return NULL; /* Premature end of stream. */ if (gzip->stream.avail_out != 0) return NULL; } } gzip->crc = crc32 (gzip->crc, buffer, (uInt)(gzip->stream.next_out - buffer)); return buffer; } static gboolean gsf_input_gzip_seek (GsfInput *input, gsf_off_t offset, GSeekType whence) { GsfInputGZip *gzip = GSF_INPUT_GZIP (input); /* Global flag -- we don't want one per stream. */ static gboolean warned = FALSE; gsf_off_t pos = offset; /* Note, that pos has already been sanity checked. */ switch (whence) { case G_SEEK_SET : break; case G_SEEK_CUR : pos += input->cur_offset; break; case G_SEEK_END : pos += input->size; break; default : return TRUE; } if (pos < input->cur_offset) { if (gsf_input_seek (gzip->source, gzip->header_size, G_SEEK_SET)) return TRUE; gzip->crc = crc32 (0L, Z_NULL, 0); gzip->stream.avail_in = 0; if (inflateReset (&(gzip->stream)) != Z_OK) return TRUE; input->cur_offset = 0; } if (gsf_input_seek_emulate (input, pos)) return TRUE; gzip->seek_skipped += pos; if (!warned && gzip->seek_skipped != pos && /* Don't warn for single seek. */ gzip->seek_skipped >= 1000000) { warned = TRUE; g_warning ("Seeking in gzipped streams is awfully slow."); } return FALSE; } static void gsf_input_gzip_init (GObject *obj) { GsfInputGZip *gzip = GSF_INPUT_GZIP (obj); gzip->source = NULL; gzip->raw = FALSE; gzip->uncompressed_size = -1; gzip->err = NULL; gzip->stream.zalloc = (alloc_func)0; gzip->stream.zfree = (free_func)0; gzip->stream.opaque = (voidpf)0; gzip->stream.next_in = Z_NULL; gzip->stream.next_out = Z_NULL; gzip->stream.avail_in = gzip->stream.avail_out = 0; gzip->crc = crc32 (0L, Z_NULL, 0); gzip->buf = NULL; gzip->buf_size = 0; gzip->seek_skipped = 0; } static void gsf_input_gzip_get_property (GObject *object, guint property_id, GValue *value, GParamSpec *pspec) { GsfInputGZip *gzip = (GsfInputGZip *)object; switch (property_id) { case PROP_RAW: g_value_set_boolean (value, gzip->raw); break; case PROP_SOURCE: g_value_set_object (value, gzip->source); break; case PROP_UNCOMPRESSED_SIZE: g_value_set_int64 (value, gzip->uncompressed_size); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; } } static void gsf_input_gzip_set_property (GObject *object, guint property_id, GValue const *value, GParamSpec *pspec) { GsfInputGZip *gzip = (GsfInputGZip *)object; switch (property_id) { case PROP_RAW: gzip->raw = g_value_get_boolean (value); break; case PROP_SOURCE: gsf_input_gzip_set_source (gzip, g_value_get_object (value)); break; case PROP_UNCOMPRESSED_SIZE: gzip->uncompressed_size = g_value_get_int64 (value); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; } } static GObject* gsf_input_gzip_constructor (GType type, guint n_construct_properties, GObjectConstructParam *construct_params) { GsfInputGZip *gzip; gzip = (GsfInputGZip *)(parent_class->constructor (type, n_construct_properties, construct_params)); if (!gzip->source) { g_clear_error (&gzip->err); gzip->err = g_error_new (gsf_input_error_id (), 0, _("NULL source")); } else if (gzip->raw && gzip->uncompressed_size < 0) { g_clear_error (&gzip->err); gzip->err = g_error_new (gsf_input_error_id (), 0, _("Uncompressed size not set")); } else if (init_zip (gzip, &gzip->err) != FALSE) { /* Nothing more. */ } return (GObject *)gzip; } static void gsf_input_gzip_class_init (GObjectClass *gobject_class) { GsfInputClass *input_class = GSF_INPUT_CLASS (gobject_class); gobject_class->constructor = gsf_input_gzip_constructor; gobject_class->finalize = gsf_input_gzip_finalize; gobject_class->set_property = gsf_input_gzip_set_property; gobject_class->get_property = gsf_input_gzip_get_property; input_class->Dup = gsf_input_gzip_dup; input_class->Read = gsf_input_gzip_read; input_class->Seek = gsf_input_gzip_seek; g_object_class_install_property (gobject_class, PROP_RAW, g_param_spec_boolean ("raw", _("Raw"), _("Whether to read compressed data with no header and no trailer"), FALSE, G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); g_object_class_install_property (gobject_class, PROP_SOURCE, g_param_spec_object ("source", _("Source"), _("Where the compressed data comes from"), GSF_INPUT_TYPE, G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); /** * GsfInputGzip:uncompressed_size: * * The size that the data will have after uncompression. * The is mandatory for raw streams and if the uncompressed size is * larger than 4GB. */ g_object_class_install_property (gobject_class, PROP_UNCOMPRESSED_SIZE, g_param_spec_int64 ("uncompressed-size", _("Size after decompression"), _("The source's uncompressed size"), -1, G_MAXINT64, -1, G_PARAM_STATIC_STRINGS | G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); parent_class = g_type_class_peek_parent (gobject_class); } GSF_CLASS (GsfInputGZip, gsf_input_gzip, gsf_input_gzip_class_init, gsf_input_gzip_init, GSF_INPUT_TYPE)