xref: /freebsd/sys/contrib/zlib/doc/rfc1952.txt (revision c9083b85)
1c9083b85SXin LI
2c9083b85SXin LI
3c9083b85SXin LI
4c9083b85SXin LI
5c9083b85SXin LI
6c9083b85SXin LI
7c9083b85SXin LINetwork Working Group                                         P. Deutsch
8c9083b85SXin LIRequest for Comments: 1952                           Aladdin Enterprises
9c9083b85SXin LICategory: Informational                                         May 1996
10c9083b85SXin LI
11c9083b85SXin LI
12c9083b85SXin LI               GZIP file format specification version 4.3
13c9083b85SXin LI
14c9083b85SXin LIStatus of This Memo
15c9083b85SXin LI
16c9083b85SXin LI   This memo provides information for the Internet community.  This memo
17c9083b85SXin LI   does not specify an Internet standard of any kind.  Distribution of
18c9083b85SXin LI   this memo is unlimited.
19c9083b85SXin LI
20c9083b85SXin LIIESG Note:
21c9083b85SXin LI
22c9083b85SXin LI   The IESG takes no position on the validity of any Intellectual
23c9083b85SXin LI   Property Rights statements contained in this document.
24c9083b85SXin LI
25c9083b85SXin LINotices
26c9083b85SXin LI
27c9083b85SXin LI   Copyright (c) 1996 L. Peter Deutsch
28c9083b85SXin LI
29c9083b85SXin LI   Permission is granted to copy and distribute this document for any
30c9083b85SXin LI   purpose and without charge, including translations into other
31c9083b85SXin LI   languages and incorporation into compilations, provided that the
32c9083b85SXin LI   copyright notice and this notice are preserved, and that any
33c9083b85SXin LI   substantive changes or deletions from the original are clearly
34c9083b85SXin LI   marked.
35c9083b85SXin LI
36c9083b85SXin LI   A pointer to the latest version of this and related documentation in
37c9083b85SXin LI   HTML format can be found at the URL
38c9083b85SXin LI   <ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
39c9083b85SXin LI
40c9083b85SXin LIAbstract
41c9083b85SXin LI
42c9083b85SXin LI   This specification defines a lossless compressed data format that is
43c9083b85SXin LI   compatible with the widely used GZIP utility.  The format includes a
44c9083b85SXin LI   cyclic redundancy check value for detecting data corruption.  The
45c9083b85SXin LI   format presently uses the DEFLATE method of compression but can be
46c9083b85SXin LI   easily extended to use other compression methods.  The format can be
47c9083b85SXin LI   implemented readily in a manner not covered by patents.
48c9083b85SXin LI
49c9083b85SXin LI
50c9083b85SXin LI
51c9083b85SXin LI
52c9083b85SXin LI
53c9083b85SXin LI
54c9083b85SXin LI
55c9083b85SXin LI
56c9083b85SXin LI
57c9083b85SXin LI
58c9083b85SXin LIDeutsch                      Informational                      [Page 1]
59c9083b85SXin LI
60c9083b85SXin LIRFC 1952             GZIP File Format Specification             May 1996
61c9083b85SXin LI
62c9083b85SXin LI
63c9083b85SXin LITable of Contents
64c9083b85SXin LI
65c9083b85SXin LI   1. Introduction ................................................... 2
66c9083b85SXin LI      1.1. Purpose ................................................... 2
67c9083b85SXin LI      1.2. Intended audience ......................................... 3
68c9083b85SXin LI      1.3. Scope ..................................................... 3
69c9083b85SXin LI      1.4. Compliance ................................................ 3
70c9083b85SXin LI      1.5. Definitions of terms and conventions used ................. 3
71c9083b85SXin LI      1.6. Changes from previous versions ............................ 3
72c9083b85SXin LI   2. Detailed specification ......................................... 4
73c9083b85SXin LI      2.1. Overall conventions ....................................... 4
74c9083b85SXin LI      2.2. File format ............................................... 5
75c9083b85SXin LI      2.3. Member format ............................................. 5
76c9083b85SXin LI          2.3.1. Member header and trailer ........................... 6
77c9083b85SXin LI              2.3.1.1. Extra field ................................... 8
78c9083b85SXin LI              2.3.1.2. Compliance .................................... 9
79c9083b85SXin LI      3. References .................................................. 9
80c9083b85SXin LI      4. Security Considerations .................................... 10
81c9083b85SXin LI      5. Acknowledgements ........................................... 10
82c9083b85SXin LI      6. Author's Address ........................................... 10
83c9083b85SXin LI      7. Appendix: Jean-Loup Gailly's gzip utility .................. 11
84c9083b85SXin LI      8. Appendix: Sample CRC Code .................................. 11
85c9083b85SXin LI
86c9083b85SXin LI1. Introduction
87c9083b85SXin LI
88c9083b85SXin LI   1.1. Purpose
89c9083b85SXin LI
90c9083b85SXin LI      The purpose of this specification is to define a lossless
91c9083b85SXin LI      compressed data format that:
92c9083b85SXin LI
93c9083b85SXin LI          * Is independent of CPU type, operating system, file system,
94c9083b85SXin LI            and character set, and hence can be used for interchange;
95c9083b85SXin LI          * Can compress or decompress a data stream (as opposed to a
96c9083b85SXin LI            randomly accessible file) to produce another data stream,
97c9083b85SXin LI            using only an a priori bounded amount of intermediate
98c9083b85SXin LI            storage, and hence can be used in data communications or
99c9083b85SXin LI            similar structures such as Unix filters;
100c9083b85SXin LI          * Compresses data with efficiency comparable to the best
101c9083b85SXin LI            currently available general-purpose compression methods,
102c9083b85SXin LI            and in particular considerably better than the "compress"
103c9083b85SXin LI            program;
104c9083b85SXin LI          * Can be implemented readily in a manner not covered by
105c9083b85SXin LI            patents, and hence can be practiced freely;
106c9083b85SXin LI          * Is compatible with the file format produced by the current
107c9083b85SXin LI            widely used gzip utility, in that conforming decompressors
108c9083b85SXin LI            will be able to read data produced by the existing gzip
109c9083b85SXin LI            compressor.
110c9083b85SXin LI
111c9083b85SXin LI
112c9083b85SXin LI
113c9083b85SXin LI
114c9083b85SXin LIDeutsch                      Informational                      [Page 2]
115c9083b85SXin LI
116c9083b85SXin LIRFC 1952             GZIP File Format Specification             May 1996
117c9083b85SXin LI
118c9083b85SXin LI
119c9083b85SXin LI      The data format defined by this specification does not attempt to:
120c9083b85SXin LI
121c9083b85SXin LI          * Provide random access to compressed data;
122c9083b85SXin LI          * Compress specialized data (e.g., raster graphics) as well as
123c9083b85SXin LI            the best currently available specialized algorithms.
124c9083b85SXin LI
125c9083b85SXin LI   1.2. Intended audience
126c9083b85SXin LI
127c9083b85SXin LI      This specification is intended for use by implementors of software
128c9083b85SXin LI      to compress data into gzip format and/or decompress data from gzip
129c9083b85SXin LI      format.
130c9083b85SXin LI
131c9083b85SXin LI      The text of the specification assumes a basic background in
132c9083b85SXin LI      programming at the level of bits and other primitive data
133c9083b85SXin LI      representations.
134c9083b85SXin LI
135c9083b85SXin LI   1.3. Scope
136c9083b85SXin LI
137c9083b85SXin LI      The specification specifies a compression method and a file format
138c9083b85SXin LI      (the latter assuming only that a file can store a sequence of
139c9083b85SXin LI      arbitrary bytes).  It does not specify any particular interface to
140c9083b85SXin LI      a file system or anything about character sets or encodings
141c9083b85SXin LI      (except for file names and comments, which are optional).
142c9083b85SXin LI
143c9083b85SXin LI   1.4. Compliance
144c9083b85SXin LI
145c9083b85SXin LI      Unless otherwise indicated below, a compliant decompressor must be
146c9083b85SXin LI      able to accept and decompress any file that conforms to all the
147c9083b85SXin LI      specifications presented here; a compliant compressor must produce
148c9083b85SXin LI      files that conform to all the specifications presented here.  The
149c9083b85SXin LI      material in the appendices is not part of the specification per se
150c9083b85SXin LI      and is not relevant to compliance.
151c9083b85SXin LI
152c9083b85SXin LI   1.5. Definitions of terms and conventions used
153c9083b85SXin LI
154c9083b85SXin LI      byte: 8 bits stored or transmitted as a unit (same as an octet).
155c9083b85SXin LI      (For this specification, a byte is exactly 8 bits, even on
156c9083b85SXin LI      machines which store a character on a number of bits different
157c9083b85SXin LI      from 8.)  See below for the numbering of bits within a byte.
158c9083b85SXin LI
159c9083b85SXin LI   1.6. Changes from previous versions
160c9083b85SXin LI
161c9083b85SXin LI      There have been no technical changes to the gzip format since
162c9083b85SXin LI      version 4.1 of this specification.  In version 4.2, some
163c9083b85SXin LI      terminology was changed, and the sample CRC code was rewritten for
164c9083b85SXin LI      clarity and to eliminate the requirement for the caller to do pre-
165c9083b85SXin LI      and post-conditioning.  Version 4.3 is a conversion of the
166c9083b85SXin LI      specification to RFC style.
167c9083b85SXin LI
168c9083b85SXin LI
169c9083b85SXin LI
170c9083b85SXin LIDeutsch                      Informational                      [Page 3]
171c9083b85SXin LI
172c9083b85SXin LIRFC 1952             GZIP File Format Specification             May 1996
173c9083b85SXin LI
174c9083b85SXin LI
175c9083b85SXin LI2. Detailed specification
176c9083b85SXin LI
177c9083b85SXin LI   2.1. Overall conventions
178c9083b85SXin LI
179c9083b85SXin LI      In the diagrams below, a box like this:
180c9083b85SXin LI
181c9083b85SXin LI         +---+
182c9083b85SXin LI         |   | <-- the vertical bars might be missing
183c9083b85SXin LI         +---+
184c9083b85SXin LI
185c9083b85SXin LI      represents one byte; a box like this:
186c9083b85SXin LI
187c9083b85SXin LI         +==============+
188c9083b85SXin LI         |              |
189c9083b85SXin LI         +==============+
190c9083b85SXin LI
191c9083b85SXin LI      represents a variable number of bytes.
192c9083b85SXin LI
193c9083b85SXin LI      Bytes stored within a computer do not have a "bit order", since
194c9083b85SXin LI      they are always treated as a unit.  However, a byte considered as
195c9083b85SXin LI      an integer between 0 and 255 does have a most- and least-
196c9083b85SXin LI      significant bit, and since we write numbers with the most-
197c9083b85SXin LI      significant digit on the left, we also write bytes with the most-
198c9083b85SXin LI      significant bit on the left.  In the diagrams below, we number the
199c9083b85SXin LI      bits of a byte so that bit 0 is the least-significant bit, i.e.,
200c9083b85SXin LI      the bits are numbered:
201c9083b85SXin LI
202c9083b85SXin LI         +--------+
203c9083b85SXin LI         |76543210|
204c9083b85SXin LI         +--------+
205c9083b85SXin LI
206c9083b85SXin LI      This document does not address the issue of the order in which
207c9083b85SXin LI      bits of a byte are transmitted on a bit-sequential medium, since
208c9083b85SXin LI      the data format described here is byte- rather than bit-oriented.
209c9083b85SXin LI
210c9083b85SXin LI      Within a computer, a number may occupy multiple bytes.  All
211c9083b85SXin LI      multi-byte numbers in the format described here are stored with
212c9083b85SXin LI      the least-significant byte first (at the lower memory address).
213c9083b85SXin LI      For example, the decimal number 520 is stored as:
214c9083b85SXin LI
215c9083b85SXin LI             0        1
216c9083b85SXin LI         +--------+--------+
217c9083b85SXin LI         |00001000|00000010|
218c9083b85SXin LI         +--------+--------+
219c9083b85SXin LI          ^        ^
220c9083b85SXin LI          |        |
221c9083b85SXin LI          |        + more significant byte = 2 x 256
222c9083b85SXin LI          + less significant byte = 8
223c9083b85SXin LI
224c9083b85SXin LI
225c9083b85SXin LI
226c9083b85SXin LIDeutsch                      Informational                      [Page 4]
227c9083b85SXin LI
228c9083b85SXin LIRFC 1952             GZIP File Format Specification             May 1996
229c9083b85SXin LI
230c9083b85SXin LI
231c9083b85SXin LI   2.2. File format
232c9083b85SXin LI
233c9083b85SXin LI      A gzip file consists of a series of "members" (compressed data
234c9083b85SXin LI      sets).  The format of each member is specified in the following
235c9083b85SXin LI      section.  The members simply appear one after another in the file,
236c9083b85SXin LI      with no additional information before, between, or after them.
237c9083b85SXin LI
238c9083b85SXin LI   2.3. Member format
239c9083b85SXin LI
240c9083b85SXin LI      Each member has the following structure:
241c9083b85SXin LI
242c9083b85SXin LI         +---+---+---+---+---+---+---+---+---+---+
243c9083b85SXin LI         |ID1|ID2|CM |FLG|     MTIME     |XFL|OS | (more-->)
244c9083b85SXin LI         +---+---+---+---+---+---+---+---+---+---+
245c9083b85SXin LI
246c9083b85SXin LI      (if FLG.FEXTRA set)
247c9083b85SXin LI
248c9083b85SXin LI         +---+---+=================================+
249c9083b85SXin LI         | XLEN  |...XLEN bytes of "extra field"...| (more-->)
250c9083b85SXin LI         +---+---+=================================+
251c9083b85SXin LI
252c9083b85SXin LI      (if FLG.FNAME set)
253c9083b85SXin LI
254c9083b85SXin LI         +=========================================+
255c9083b85SXin LI         |...original file name, zero-terminated...| (more-->)
256c9083b85SXin LI         +=========================================+
257c9083b85SXin LI
258c9083b85SXin LI      (if FLG.FCOMMENT set)
259c9083b85SXin LI
260c9083b85SXin LI         +===================================+
261c9083b85SXin LI         |...file comment, zero-terminated...| (more-->)
262c9083b85SXin LI         +===================================+
263c9083b85SXin LI
264c9083b85SXin LI      (if FLG.FHCRC set)
265c9083b85SXin LI
266c9083b85SXin LI         +---+---+
267c9083b85SXin LI         | CRC16 |
268c9083b85SXin LI         +---+---+
269c9083b85SXin LI
270c9083b85SXin LI         +=======================+
271c9083b85SXin LI         |...compressed blocks...| (more-->)
272c9083b85SXin LI         +=======================+
273c9083b85SXin LI
274c9083b85SXin LI           0   1   2   3   4   5   6   7
275c9083b85SXin LI         +---+---+---+---+---+---+---+---+
276c9083b85SXin LI         |     CRC32     |     ISIZE     |
277c9083b85SXin LI         +---+---+---+---+---+---+---+---+
278c9083b85SXin LI
279c9083b85SXin LI
280c9083b85SXin LI
281c9083b85SXin LI
282c9083b85SXin LIDeutsch                      Informational                      [Page 5]
283c9083b85SXin LI
284c9083b85SXin LIRFC 1952             GZIP File Format Specification             May 1996
285c9083b85SXin LI
286c9083b85SXin LI
287c9083b85SXin LI      2.3.1. Member header and trailer
288c9083b85SXin LI
289c9083b85SXin LI         ID1 (IDentification 1)
290c9083b85SXin LI         ID2 (IDentification 2)
291c9083b85SXin LI            These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139
292c9083b85SXin LI            (0x8b, \213), to identify the file as being in gzip format.
293c9083b85SXin LI
294c9083b85SXin LI         CM (Compression Method)
295c9083b85SXin LI            This identifies the compression method used in the file.  CM
296c9083b85SXin LI            = 0-7 are reserved.  CM = 8 denotes the "deflate"
297c9083b85SXin LI            compression method, which is the one customarily used by
298c9083b85SXin LI            gzip and which is documented elsewhere.
299c9083b85SXin LI
300c9083b85SXin LI         FLG (FLaGs)
301c9083b85SXin LI            This flag byte is divided into individual bits as follows:
302c9083b85SXin LI
303c9083b85SXin LI               bit 0   FTEXT
304c9083b85SXin LI               bit 1   FHCRC
305c9083b85SXin LI               bit 2   FEXTRA
306c9083b85SXin LI               bit 3   FNAME
307c9083b85SXin LI               bit 4   FCOMMENT
308c9083b85SXin LI               bit 5   reserved
309c9083b85SXin LI               bit 6   reserved
310c9083b85SXin LI               bit 7   reserved
311c9083b85SXin LI
312c9083b85SXin LI            If FTEXT is set, the file is probably ASCII text.  This is
313c9083b85SXin LI            an optional indication, which the compressor may set by
314c9083b85SXin LI            checking a small amount of the input data to see whether any
315c9083b85SXin LI            non-ASCII characters are present.  In case of doubt, FTEXT
316c9083b85SXin LI            is cleared, indicating binary data. For systems which have
317c9083b85SXin LI            different file formats for ascii text and binary data, the
318c9083b85SXin LI            decompressor can use FTEXT to choose the appropriate format.
319c9083b85SXin LI            We deliberately do not specify the algorithm used to set
320c9083b85SXin LI            this bit, since a compressor always has the option of
321c9083b85SXin LI            leaving it cleared and a decompressor always has the option
322c9083b85SXin LI            of ignoring it and letting some other program handle issues
323c9083b85SXin LI            of data conversion.
324c9083b85SXin LI
325c9083b85SXin LI            If FHCRC is set, a CRC16 for the gzip header is present,
326c9083b85SXin LI            immediately before the compressed data. The CRC16 consists
327c9083b85SXin LI            of the two least significant bytes of the CRC32 for all
328c9083b85SXin LI            bytes of the gzip header up to and not including the CRC16.
329c9083b85SXin LI            [The FHCRC bit was never set by versions of gzip up to
330c9083b85SXin LI            1.2.4, even though it was documented with a different
331c9083b85SXin LI            meaning in gzip 1.2.4.]
332c9083b85SXin LI
333c9083b85SXin LI            If FEXTRA is set, optional extra fields are present, as
334c9083b85SXin LI            described in a following section.
335c9083b85SXin LI
336c9083b85SXin LI
337c9083b85SXin LI
338c9083b85SXin LIDeutsch                      Informational                      [Page 6]
339c9083b85SXin LI
340c9083b85SXin LIRFC 1952             GZIP File Format Specification             May 1996
341c9083b85SXin LI
342c9083b85SXin LI
343c9083b85SXin LI            If FNAME is set, an original file name is present,
344c9083b85SXin LI            terminated by a zero byte.  The name must consist of ISO
345c9083b85SXin LI            8859-1 (LATIN-1) characters; on operating systems using
346c9083b85SXin LI            EBCDIC or any other character set for file names, the name
347c9083b85SXin LI            must be translated to the ISO LATIN-1 character set.  This
348c9083b85SXin LI            is the original name of the file being compressed, with any
349c9083b85SXin LI            directory components removed, and, if the file being
350c9083b85SXin LI            compressed is on a file system with case insensitive names,
351c9083b85SXin LI            forced to lower case. There is no original file name if the
352c9083b85SXin LI            data was compressed from a source other than a named file;
353c9083b85SXin LI            for example, if the source was stdin on a Unix system, there
354c9083b85SXin LI            is no file name.
355c9083b85SXin LI
356c9083b85SXin LI            If FCOMMENT is set, a zero-terminated file comment is
357c9083b85SXin LI            present.  This comment is not interpreted; it is only
358c9083b85SXin LI            intended for human consumption.  The comment must consist of
359c9083b85SXin LI            ISO 8859-1 (LATIN-1) characters.  Line breaks should be
360c9083b85SXin LI            denoted by a single line feed character (10 decimal).
361c9083b85SXin LI
362c9083b85SXin LI            Reserved FLG bits must be zero.
363c9083b85SXin LI
364c9083b85SXin LI         MTIME (Modification TIME)
365c9083b85SXin LI            This gives the most recent modification time of the original
366c9083b85SXin LI            file being compressed.  The time is in Unix format, i.e.,
367c9083b85SXin LI            seconds since 00:00:00 GMT, Jan.  1, 1970.  (Note that this
368c9083b85SXin LI            may cause problems for MS-DOS and other systems that use
369c9083b85SXin LI            local rather than Universal time.)  If the compressed data
370c9083b85SXin LI            did not come from a file, MTIME is set to the time at which
371c9083b85SXin LI            compression started.  MTIME = 0 means no time stamp is
372c9083b85SXin LI            available.
373c9083b85SXin LI
374c9083b85SXin LI         XFL (eXtra FLags)
375c9083b85SXin LI            These flags are available for use by specific compression
376c9083b85SXin LI            methods.  The "deflate" method (CM = 8) sets these flags as
377c9083b85SXin LI            follows:
378c9083b85SXin LI
379c9083b85SXin LI               XFL = 2 - compressor used maximum compression,
380c9083b85SXin LI                         slowest algorithm
381c9083b85SXin LI               XFL = 4 - compressor used fastest algorithm
382c9083b85SXin LI
383c9083b85SXin LI         OS (Operating System)
384c9083b85SXin LI            This identifies the type of file system on which compression
385c9083b85SXin LI            took place.  This may be useful in determining end-of-line
386c9083b85SXin LI            convention for text files.  The currently defined values are
387c9083b85SXin LI            as follows:
388c9083b85SXin LI
389c9083b85SXin LI
390c9083b85SXin LI
391c9083b85SXin LI
392c9083b85SXin LI
393c9083b85SXin LI
394c9083b85SXin LIDeutsch                      Informational                      [Page 7]
395c9083b85SXin LI
396c9083b85SXin LIRFC 1952             GZIP File Format Specification             May 1996
397c9083b85SXin LI
398c9083b85SXin LI
399c9083b85SXin LI                 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
400c9083b85SXin LI                 1 - Amiga
401c9083b85SXin LI                 2 - VMS (or OpenVMS)
402c9083b85SXin LI                 3 - Unix
403c9083b85SXin LI                 4 - VM/CMS
404c9083b85SXin LI                 5 - Atari TOS
405c9083b85SXin LI                 6 - HPFS filesystem (OS/2, NT)
406c9083b85SXin LI                 7 - Macintosh
407c9083b85SXin LI                 8 - Z-System
408c9083b85SXin LI                 9 - CP/M
409c9083b85SXin LI                10 - TOPS-20
410c9083b85SXin LI                11 - NTFS filesystem (NT)
411c9083b85SXin LI                12 - QDOS
412c9083b85SXin LI                13 - Acorn RISCOS
413c9083b85SXin LI               255 - unknown
414c9083b85SXin LI
415c9083b85SXin LI         XLEN (eXtra LENgth)
416c9083b85SXin LI            If FLG.FEXTRA is set, this gives the length of the optional
417c9083b85SXin LI            extra field.  See below for details.
418c9083b85SXin LI
419c9083b85SXin LI         CRC32 (CRC-32)
420c9083b85SXin LI            This contains a Cyclic Redundancy Check value of the
421c9083b85SXin LI            uncompressed data computed according to CRC-32 algorithm
422c9083b85SXin LI            used in the ISO 3309 standard and in section 8.1.1.6.2 of
423c9083b85SXin LI            ITU-T recommendation V.42.  (See http://www.iso.ch for
424c9083b85SXin LI            ordering ISO documents. See gopher://info.itu.ch for an
425c9083b85SXin LI            online version of ITU-T V.42.)
426c9083b85SXin LI
427c9083b85SXin LI         ISIZE (Input SIZE)
428c9083b85SXin LI            This contains the size of the original (uncompressed) input
429c9083b85SXin LI            data modulo 2^32.
430c9083b85SXin LI
431c9083b85SXin LI      2.3.1.1. Extra field
432c9083b85SXin LI
433c9083b85SXin LI         If the FLG.FEXTRA bit is set, an "extra field" is present in
434c9083b85SXin LI         the header, with total length XLEN bytes.  It consists of a
435c9083b85SXin LI         series of subfields, each of the form:
436c9083b85SXin LI
437c9083b85SXin LI            +---+---+---+---+==================================+
438c9083b85SXin LI            |SI1|SI2|  LEN  |... LEN bytes of subfield data ...|
439c9083b85SXin LI            +---+---+---+---+==================================+
440c9083b85SXin LI
441c9083b85SXin LI         SI1 and SI2 provide a subfield ID, typically two ASCII letters
442c9083b85SXin LI         with some mnemonic value.  Jean-Loup Gailly
443c9083b85SXin LI         <gzip@prep.ai.mit.edu> is maintaining a registry of subfield
444c9083b85SXin LI         IDs; please send him any subfield ID you wish to use.  Subfield
445c9083b85SXin LI         IDs with SI2 = 0 are reserved for future use.  The following
446c9083b85SXin LI         IDs are currently defined:
447c9083b85SXin LI
448c9083b85SXin LI
449c9083b85SXin LI
450c9083b85SXin LIDeutsch                      Informational                      [Page 8]
451c9083b85SXin LI
452c9083b85SXin LIRFC 1952             GZIP File Format Specification             May 1996
453c9083b85SXin LI
454c9083b85SXin LI
455c9083b85SXin LI            SI1         SI2         Data
456c9083b85SXin LI            ----------  ----------  ----
457c9083b85SXin LI            0x41 ('A')  0x70 ('P')  Apollo file type information
458c9083b85SXin LI
459c9083b85SXin LI         LEN gives the length of the subfield data, excluding the 4
460c9083b85SXin LI         initial bytes.
461c9083b85SXin LI
462c9083b85SXin LI      2.3.1.2. Compliance
463c9083b85SXin LI
464c9083b85SXin LI         A compliant compressor must produce files with correct ID1,
465c9083b85SXin LI         ID2, CM, CRC32, and ISIZE, but may set all the other fields in
466c9083b85SXin LI         the fixed-length part of the header to default values (255 for
467c9083b85SXin LI         OS, 0 for all others).  The compressor must set all reserved
468c9083b85SXin LI         bits to zero.
469c9083b85SXin LI
470c9083b85SXin LI         A compliant decompressor must check ID1, ID2, and CM, and
471c9083b85SXin LI         provide an error indication if any of these have incorrect
472c9083b85SXin LI         values.  It must examine FEXTRA/XLEN, FNAME, FCOMMENT and FHCRC
473c9083b85SXin LI         at least so it can skip over the optional fields if they are
474c9083b85SXin LI         present.  It need not examine any other part of the header or
475c9083b85SXin LI         trailer; in particular, a decompressor may ignore FTEXT and OS
476c9083b85SXin LI         and always produce binary output, and still be compliant.  A
477c9083b85SXin LI         compliant decompressor must give an error indication if any
478c9083b85SXin LI         reserved bit is non-zero, since such a bit could indicate the
479c9083b85SXin LI         presence of a new field that would cause subsequent data to be
480c9083b85SXin LI         interpreted incorrectly.
481c9083b85SXin LI
482c9083b85SXin LI3. References
483c9083b85SXin LI
484c9083b85SXin LI   [1] "Information Processing - 8-bit single-byte coded graphic
485c9083b85SXin LI       character sets - Part 1: Latin alphabet No.1" (ISO 8859-1:1987).
486c9083b85SXin LI       The ISO 8859-1 (Latin-1) character set is a superset of 7-bit
487c9083b85SXin LI       ASCII. Files defining this character set are available as
488c9083b85SXin LI       iso_8859-1.* in ftp://ftp.uu.net/graphics/png/documents/
489c9083b85SXin LI
490c9083b85SXin LI   [2] ISO 3309
491c9083b85SXin LI
492c9083b85SXin LI   [3] ITU-T recommendation V.42
493c9083b85SXin LI
494c9083b85SXin LI   [4] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification",
495c9083b85SXin LI       available in ftp://ftp.uu.net/pub/archiving/zip/doc/
496c9083b85SXin LI
497c9083b85SXin LI   [5] Gailly, J.-L., GZIP documentation, available as gzip-*.tar in
498c9083b85SXin LI       ftp://prep.ai.mit.edu/pub/gnu/
499c9083b85SXin LI
500c9083b85SXin LI   [6] Sarwate, D.V., "Computation of Cyclic Redundancy Checks via Table
501c9083b85SXin LI       Look-Up", Communications of the ACM, 31(8), pp.1008-1013.
502c9083b85SXin LI
503c9083b85SXin LI
504c9083b85SXin LI
505c9083b85SXin LI
506c9083b85SXin LIDeutsch                      Informational                      [Page 9]
507c9083b85SXin LI
508c9083b85SXin LIRFC 1952             GZIP File Format Specification             May 1996
509c9083b85SXin LI
510c9083b85SXin LI
511c9083b85SXin LI   [7] Schwaderer, W.D., "CRC Calculation", April 85 PC Tech Journal,
512c9083b85SXin LI       pp.118-133.
513c9083b85SXin LI
514c9083b85SXin LI   [8] ftp://ftp.adelaide.edu.au/pub/rocksoft/papers/crc_v3.txt,
515c9083b85SXin LI       describing the CRC concept.
516c9083b85SXin LI
517c9083b85SXin LI4. Security Considerations
518c9083b85SXin LI
519c9083b85SXin LI   Any data compression method involves the reduction of redundancy in
520c9083b85SXin LI   the data.  Consequently, any corruption of the data is likely to have
521c9083b85SXin LI   severe effects and be difficult to correct.  Uncompressed text, on
522c9083b85SXin LI   the other hand, will probably still be readable despite the presence
523c9083b85SXin LI   of some corrupted bytes.
524c9083b85SXin LI
525c9083b85SXin LI   It is recommended that systems using this data format provide some
526c9083b85SXin LI   means of validating the integrity of the compressed data, such as by
527c9083b85SXin LI   setting and checking the CRC-32 check value.
528c9083b85SXin LI
529c9083b85SXin LI5. Acknowledgements
530c9083b85SXin LI
531c9083b85SXin LI   Trademarks cited in this document are the property of their
532c9083b85SXin LI   respective owners.
533c9083b85SXin LI
534c9083b85SXin LI   Jean-Loup Gailly designed the gzip format and wrote, with Mark Adler,
535c9083b85SXin LI   the related software described in this specification.  Glenn
536c9083b85SXin LI   Randers-Pehrson converted this document to RFC and HTML format.
537c9083b85SXin LI
538c9083b85SXin LI6. Author's Address
539c9083b85SXin LI
540c9083b85SXin LI   L. Peter Deutsch
541c9083b85SXin LI   Aladdin Enterprises
542c9083b85SXin LI   203 Santa Margarita Ave.
543c9083b85SXin LI   Menlo Park, CA 94025
544c9083b85SXin LI
545c9083b85SXin LI   Phone: (415) 322-0103 (AM only)
546c9083b85SXin LI   FAX:   (415) 322-1734
547c9083b85SXin LI   EMail: <ghost@aladdin.com>
548c9083b85SXin LI
549c9083b85SXin LI   Questions about the technical content of this specification can be
550c9083b85SXin LI   sent by email to:
551c9083b85SXin LI
552c9083b85SXin LI   Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
553c9083b85SXin LI   Mark Adler <madler@alumni.caltech.edu>
554c9083b85SXin LI
555c9083b85SXin LI   Editorial comments on this specification can be sent by email to:
556c9083b85SXin LI
557c9083b85SXin LI   L. Peter Deutsch <ghost@aladdin.com> and
558c9083b85SXin LI   Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
559c9083b85SXin LI
560c9083b85SXin LI
561c9083b85SXin LI
562c9083b85SXin LIDeutsch                      Informational                     [Page 10]
563c9083b85SXin LI
564c9083b85SXin LIRFC 1952             GZIP File Format Specification             May 1996
565c9083b85SXin LI
566c9083b85SXin LI
567c9083b85SXin LI7. Appendix: Jean-Loup Gailly's gzip utility
568c9083b85SXin LI
569c9083b85SXin LI   The most widely used implementation of gzip compression, and the
570c9083b85SXin LI   original documentation on which this specification is based, were
571c9083b85SXin LI   created by Jean-Loup Gailly <gzip@prep.ai.mit.edu>.  Since this
572c9083b85SXin LI   implementation is a de facto standard, we mention some more of its
573c9083b85SXin LI   features here.  Again, the material in this section is not part of
574c9083b85SXin LI   the specification per se, and implementations need not follow it to
575c9083b85SXin LI   be compliant.
576c9083b85SXin LI
577c9083b85SXin LI   When compressing or decompressing a file, gzip preserves the
578c9083b85SXin LI   protection, ownership, and modification time attributes on the local
579c9083b85SXin LI   file system, since there is no provision for representing protection
580c9083b85SXin LI   attributes in the gzip file format itself.  Since the file format
581c9083b85SXin LI   includes a modification time, the gzip decompressor provides a
582c9083b85SXin LI   command line switch that assigns the modification time from the file,
583c9083b85SXin LI   rather than the local modification time of the compressed input, to
584c9083b85SXin LI   the decompressed output.
585c9083b85SXin LI
586c9083b85SXin LI8. Appendix: Sample CRC Code
587c9083b85SXin LI
588c9083b85SXin LI   The following sample code represents a practical implementation of
589c9083b85SXin LI   the CRC (Cyclic Redundancy Check). (See also ISO 3309 and ITU-T V.42
590c9083b85SXin LI   for a formal specification.)
591c9083b85SXin LI
592c9083b85SXin LI   The sample code is in the ANSI C programming language. Non C users
593c9083b85SXin LI   may find it easier to read with these hints:
594c9083b85SXin LI
595c9083b85SXin LI      &      Bitwise AND operator.
596c9083b85SXin LI      ^      Bitwise exclusive-OR operator.
597c9083b85SXin LI      >>     Bitwise right shift operator. When applied to an
598c9083b85SXin LI             unsigned quantity, as here, right shift inserts zero
599c9083b85SXin LI             bit(s) at the left.
600c9083b85SXin LI      !      Logical NOT operator.
601c9083b85SXin LI      ++     "n++" increments the variable n.
602c9083b85SXin LI      0xNNN  0x introduces a hexadecimal (base 16) constant.
603c9083b85SXin LI             Suffix L indicates a long value (at least 32 bits).
604c9083b85SXin LI
605c9083b85SXin LI      /* Table of CRCs of all 8-bit messages. */
606c9083b85SXin LI      unsigned long crc_table[256];
607c9083b85SXin LI
608c9083b85SXin LI      /* Flag: has the table been computed? Initially false. */
609c9083b85SXin LI      int crc_table_computed = 0;
610c9083b85SXin LI
611c9083b85SXin LI      /* Make the table for a fast CRC. */
612c9083b85SXin LI      void make_crc_table(void)
613c9083b85SXin LI      {
614c9083b85SXin LI        unsigned long c;
615c9083b85SXin LI
616c9083b85SXin LI
617c9083b85SXin LI
618c9083b85SXin LIDeutsch                      Informational                     [Page 11]
619c9083b85SXin LI
620c9083b85SXin LIRFC 1952             GZIP File Format Specification             May 1996
621c9083b85SXin LI
622c9083b85SXin LI
623c9083b85SXin LI        int n, k;
624c9083b85SXin LI        for (n = 0; n < 256; n++) {
625c9083b85SXin LI          c = (unsigned long) n;
626c9083b85SXin LI          for (k = 0; k < 8; k++) {
627c9083b85SXin LI            if (c & 1) {
628c9083b85SXin LI              c = 0xedb88320L ^ (c >> 1);
629c9083b85SXin LI            } else {
630c9083b85SXin LI              c = c >> 1;
631c9083b85SXin LI            }
632c9083b85SXin LI          }
633c9083b85SXin LI          crc_table[n] = c;
634c9083b85SXin LI        }
635c9083b85SXin LI        crc_table_computed = 1;
636c9083b85SXin LI      }
637c9083b85SXin LI
638c9083b85SXin LI      /*
639c9083b85SXin LI         Update a running crc with the bytes buf[0..len-1] and return
640c9083b85SXin LI       the updated crc. The crc should be initialized to zero. Pre- and
641c9083b85SXin LI       post-conditioning (one's complement) is performed within this
642c9083b85SXin LI       function so it shouldn't be done by the caller. Usage example:
643c9083b85SXin LI
644c9083b85SXin LI         unsigned long crc = 0L;
645c9083b85SXin LI
646c9083b85SXin LI         while (read_buffer(buffer, length) != EOF) {
647c9083b85SXin LI           crc = update_crc(crc, buffer, length);
648c9083b85SXin LI         }
649c9083b85SXin LI         if (crc != original_crc) error();
650c9083b85SXin LI      */
651c9083b85SXin LI      unsigned long update_crc(unsigned long crc,
652c9083b85SXin LI                      unsigned char *buf, int len)
653c9083b85SXin LI      {
654c9083b85SXin LI        unsigned long c = crc ^ 0xffffffffL;
655c9083b85SXin LI        int n;
656c9083b85SXin LI
657c9083b85SXin LI        if (!crc_table_computed)
658c9083b85SXin LI          make_crc_table();
659c9083b85SXin LI        for (n = 0; n < len; n++) {
660c9083b85SXin LI          c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8);
661c9083b85SXin LI        }
662c9083b85SXin LI        return c ^ 0xffffffffL;
663c9083b85SXin LI      }
664c9083b85SXin LI
665c9083b85SXin LI      /* Return the CRC of the bytes buf[0..len-1]. */
666c9083b85SXin LI      unsigned long crc(unsigned char *buf, int len)
667c9083b85SXin LI      {
668c9083b85SXin LI        return update_crc(0L, buf, len);
669c9083b85SXin LI      }
670c9083b85SXin LI
671c9083b85SXin LI
672c9083b85SXin LI
673c9083b85SXin LI
674c9083b85SXin LIDeutsch                      Informational                     [Page 12]
675c9083b85SXin LI
676