1e25fdb51Smrg /* pigz.c -- parallel implementation of gzip
2cf2fd8adStls * Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Mark Adler
3cf2fd8adStls * Version 2.3.1 9 Oct 2013 Mark Adler
4e25fdb51Smrg */
5e25fdb51Smrg
6e25fdb51Smrg /*
7e25fdb51Smrg This software is provided 'as-is', without any express or implied
8e25fdb51Smrg warranty. In no event will the author be held liable for any damages
9e25fdb51Smrg arising from the use of this software.
10e25fdb51Smrg
11e25fdb51Smrg Permission is granted to anyone to use this software for any purpose,
12e25fdb51Smrg including commercial applications, and to alter it and redistribute it
13e25fdb51Smrg freely, subject to the following restrictions:
14e25fdb51Smrg
15e25fdb51Smrg 1. The origin of this software must not be misrepresented; you must not
16e25fdb51Smrg claim that you wrote the original software. If you use this software
17e25fdb51Smrg in a product, an acknowledgment in the product documentation would be
18e25fdb51Smrg appreciated but is not required.
19e25fdb51Smrg 2. Altered source versions must be plainly marked as such, and must not be
20e25fdb51Smrg misrepresented as being the original software.
21e25fdb51Smrg 3. This notice may not be removed or altered from any source distribution.
22e25fdb51Smrg
23e25fdb51Smrg Mark Adler
24e25fdb51Smrg madler@alumni.caltech.edu
25e25fdb51Smrg
26e25fdb51Smrg Mark accepts donations for providing this software. Donations are not
27e25fdb51Smrg required or expected. Any amount that you feel is appropriate would be
28e25fdb51Smrg appreciated. You can use this link:
29e25fdb51Smrg
30e25fdb51Smrg https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=536055
31e25fdb51Smrg
32e25fdb51Smrg */
33e25fdb51Smrg
34e25fdb51Smrg /* Version history:
35e25fdb51Smrg 1.0 17 Jan 2007 First version, pipe only
36e25fdb51Smrg 1.1 28 Jan 2007 Avoid void * arithmetic (some compilers don't get that)
37e25fdb51Smrg Add note about requiring zlib 1.2.3
38e25fdb51Smrg Allow compression level 0 (no compression)
39e25fdb51Smrg Completely rewrite parallelism -- add a write thread
40e25fdb51Smrg Use deflateSetDictionary() to make use of history
41e25fdb51Smrg Tune argument defaults to best performance on four cores
42e25fdb51Smrg 1.2.1 1 Feb 2007 Add long command line options, add all gzip options
43e25fdb51Smrg Add debugging options
44e25fdb51Smrg 1.2.2 19 Feb 2007 Add list (--list) function
45e25fdb51Smrg Process file names on command line, write .gz output
46e25fdb51Smrg Write name and time in gzip header, set output file time
47e25fdb51Smrg Implement all command line options except --recursive
48e25fdb51Smrg Add --keep option to prevent deleting input files
49e25fdb51Smrg Add thread tracing information with -vv used
50cf2fd8adStls Copy crc32_combine() from zlib (shared libraries issue)
51e25fdb51Smrg 1.3 25 Feb 2007 Implement --recursive
52e25fdb51Smrg Expand help to show all options
53e25fdb51Smrg Show help if no arguments or output piping are provided
54e25fdb51Smrg Process options in GZIP environment variable
55e25fdb51Smrg Add progress indicator to write thread if --verbose
56e25fdb51Smrg 1.4 4 Mar 2007 Add --independent to facilitate damaged file recovery
57e25fdb51Smrg Reallocate jobs for new --blocksize or --processes
58e25fdb51Smrg Do not delete original if writing to stdout
59e25fdb51Smrg Allow --processes 1, which does no threading
60e25fdb51Smrg Add NOTHREAD define to compile without threads
61e25fdb51Smrg Incorporate license text from zlib in source code
62e25fdb51Smrg 1.5 25 Mar 2007 Reinitialize jobs for new compression level
63e25fdb51Smrg Copy attributes and owner from input file to output file
64e25fdb51Smrg Add decompression and testing
65e25fdb51Smrg Add -lt (or -ltv) to show all entries and proper lengths
66e25fdb51Smrg Add decompression, testing, listing of LZW (.Z) files
67e25fdb51Smrg Only generate and show trace log if DEBUG defined
68e25fdb51Smrg Take "-" argument to mean read file from stdin
69e25fdb51Smrg 1.6 30 Mar 2007 Add zlib stream compression (--zlib), and decompression
70e25fdb51Smrg 1.7 29 Apr 2007 Decompress first entry of a zip file (if deflated)
71e25fdb51Smrg Avoid empty deflate blocks at end of deflate stream
72e25fdb51Smrg Show zlib check value (Adler-32) when listing
73e25fdb51Smrg Don't complain when decompressing empty file
74e25fdb51Smrg Warn about trailing junk for gzip and zlib streams
75e25fdb51Smrg Make listings consistent, ignore gzip extra flags
76e25fdb51Smrg Add zip stream compression (--zip)
77e25fdb51Smrg 1.8 13 May 2007 Document --zip option in help output
78e25fdb51Smrg 2.0 19 Oct 2008 Complete rewrite of thread usage and synchronization
79e25fdb51Smrg Use polling threads and a pool of memory buffers
80e25fdb51Smrg Remove direct pthread library use, hide in yarn.c
81e25fdb51Smrg 2.0.1 20 Oct 2008 Check version of zlib at compile time, need >= 1.2.3
82e25fdb51Smrg 2.1 24 Oct 2008 Decompress with read, write, inflate, and check threads
83e25fdb51Smrg Remove spurious use of ctime_r(), ctime() more portable
84e25fdb51Smrg Change application of job->calc lock to be a semaphore
85e25fdb51Smrg Detect size of off_t at run time to select %lu vs. %llu
86e25fdb51Smrg #define large file support macro even if not __linux__
87e25fdb51Smrg Remove _LARGEFILE64_SOURCE, _FILE_OFFSET_BITS is enough
88e25fdb51Smrg Detect file-too-large error and report, blame build
89e25fdb51Smrg Replace check combination routines with those from zlib
90e25fdb51Smrg 2.1.1 28 Oct 2008 Fix a leak for files with an integer number of blocks
91e25fdb51Smrg Update for yarn 1.1 (yarn_prefix and yarn_abort)
92e25fdb51Smrg 2.1.2 30 Oct 2008 Work around use of beta zlib in production systems
93e25fdb51Smrg 2.1.3 8 Nov 2008 Don't use zlib combination routines, put back in pigz
94e25fdb51Smrg 2.1.4 9 Nov 2008 Fix bug when decompressing very short files
95e25fdb51Smrg 2.1.5 20 Jul 2009 Added 2008, 2009 to --license statement
96e25fdb51Smrg Allow numeric parameter immediately after -p or -b
97e25fdb51Smrg Enforce parameter after -p, -b, -s, before other options
98e25fdb51Smrg Enforce numeric parameters to have only numeric digits
99e25fdb51Smrg Try to determine the number of processors for -p default
100e25fdb51Smrg Fix --suffix short option to be -S to match gzip [Bloch]
101e25fdb51Smrg Decompress if executable named "unpigz" [Amundsen]
102e25fdb51Smrg Add a little bit of testing to Makefile
103e25fdb51Smrg 2.1.6 17 Jan 2010 Added pigz.spec to distribution for RPM systems [Brown]
104e25fdb51Smrg Avoid some compiler warnings
105e25fdb51Smrg Process symbolic links if piping to stdout [Hoffstätte]
106e25fdb51Smrg Decompress if executable named "gunzip" [Hoffstätte]
107e25fdb51Smrg Allow ".tgz" suffix [Chernookiy]
108e25fdb51Smrg Fix adler32 comparison on .zz files
109cf2fd8adStls 2.1.7 17 Dec 2011 Avoid unused parameter warning in reenter()
110cf2fd8adStls Don't assume 2's complement ints in compress_thread()
111cf2fd8adStls Replicate gzip -cdf cat-like behavior
112cf2fd8adStls Replicate gzip -- option to suppress option decoding
113cf2fd8adStls Test output from make test instead of showing it
114cf2fd8adStls Updated pigz.spec to install unpigz, pigz.1 [Obermaier]
115cf2fd8adStls Add PIGZ environment variable [Mueller]
116cf2fd8adStls Replicate gzip suffix search when decoding or listing
117cf2fd8adStls Fix bug in load() to set in_left to zero on end of file
118cf2fd8adStls Do not check suffix when input file won't be modified
119cf2fd8adStls Decompress to stdout if name is "*cat" [Hayasaka]
120cf2fd8adStls Write data descriptor signature to be like Info-ZIP
121cf2fd8adStls Update and sort options list in help
122cf2fd8adStls Use CC variable for compiler in Makefile
123cf2fd8adStls Exit with code 2 if a warning has been issued
124cf2fd8adStls Fix thread synchronization problem when tracing
125cf2fd8adStls Change macro name MAX to MAX2 to avoid library conflicts
126cf2fd8adStls Determine number of processors on HP-UX [Lloyd]
127cf2fd8adStls 2.2 31 Dec 2011 Check for expansion bound busting (e.g. modified zlib)
128cf2fd8adStls Make the "threads" list head global variable volatile
129cf2fd8adStls Fix construction and printing of 32-bit check values
130cf2fd8adStls Add --rsyncable functionality
131cf2fd8adStls 2.2.1 1 Jan 2012 Fix bug in --rsyncable buffer management
132cf2fd8adStls 2.2.2 1 Jan 2012 Fix another bug in --rsyncable buffer management
133cf2fd8adStls 2.2.3 15 Jan 2012 Remove volatile in yarn.c
134cf2fd8adStls Reduce the number of input buffers
135cf2fd8adStls Change initial rsyncable hash to comparison value
136cf2fd8adStls Improve the efficiency of arriving at a byte boundary
137cf2fd8adStls Add thread portability #defines from yarn.c
138cf2fd8adStls Have rsyncable compression be independent of threading
139cf2fd8adStls Fix bug where constructed dictionaries not being used
140cf2fd8adStls 2.2.4 11 Mar 2012 Avoid some return value warnings
141cf2fd8adStls Improve the portability of printing the off_t type
142cf2fd8adStls Check for existence of compress binary before using
143cf2fd8adStls Update zlib version checking to 1.2.6 for new functions
144cf2fd8adStls Fix bug in zip (-K) output
145cf2fd8adStls Fix license in pigz.spec
146cf2fd8adStls Remove thread portability #defines in pigz.c
147cf2fd8adStls 2.2.5 28 Jul 2012 Avoid race condition in free_pool()
148cf2fd8adStls Change suffix to .tar when decompressing or listing .tgz
149cf2fd8adStls Print name of executable in error messages
150cf2fd8adStls Show help properly when the name is unpigz or gunzip
151cf2fd8adStls Fix permissions security problem before output is closed
152cf2fd8adStls 2.3 3 Mar 2013 Don't complain about missing suffix on stdout
153cf2fd8adStls Put all global variables in a structure for readability
154cf2fd8adStls Do not decompress concatenated zlib streams (just gzip)
155cf2fd8adStls Add option for compression level 11 to use zopfli
156cf2fd8adStls Fix handling of junk after compressed data
157cf2fd8adStls 2.3.1 9 Oct 2013 Fix builds of pigzt and pigzn to include zopfli
158cf2fd8adStls Add -lm, needed to link log function on some systems
159cf2fd8adStls Respect LDFLAGS in Makefile, use CFLAGS consistently
160cf2fd8adStls Add memory allocation tracking
161cf2fd8adStls Fix casting error in uncompressed length calculation
162cf2fd8adStls Update zopfli to Mar 10, 2013 Google state
163cf2fd8adStls Support zopfli in single thread case
164cf2fd8adStls Add -F, -I, -M, and -O options for zopfli tuning
165e25fdb51Smrg */
166e25fdb51Smrg
167cf2fd8adStls #define VERSION "pigz 2.3.1\n"
168e25fdb51Smrg
169e25fdb51Smrg /* To-do:
170e25fdb51Smrg - make source portable for Windows, VMS, etc. (see gzip source code)
171e25fdb51Smrg - make build portable (currently good for Unixish)
172e25fdb51Smrg */
173e25fdb51Smrg
174e25fdb51Smrg /*
175e25fdb51Smrg pigz compresses using threads to make use of multiple processors and cores.
176e25fdb51Smrg The input is broken up into 128 KB chunks with each compressed in parallel.
177e25fdb51Smrg The individual check value for each chunk is also calculated in parallel.
178e25fdb51Smrg The compressed data is written in order to the output, and a combined check
179e25fdb51Smrg value is calculated from the individual check values.
180e25fdb51Smrg
181e25fdb51Smrg The compressed data format generated is in the gzip, zlib, or single-entry
182e25fdb51Smrg zip format using the deflate compression method. The compression produces
183e25fdb51Smrg partial raw deflate streams which are concatenated by a single write thread
184e25fdb51Smrg and wrapped with the appropriate header and trailer, where the trailer
185e25fdb51Smrg contains the combined check value.
186e25fdb51Smrg
187e25fdb51Smrg Each partial raw deflate stream is terminated by an empty stored block
188e25fdb51Smrg (using the Z_SYNC_FLUSH option of zlib), in order to end that partial bit
189cf2fd8adStls stream at a byte boundary, unless that partial stream happens to already end
190cf2fd8adStls at a byte boundary (the latter requires zlib 1.2.6 or later). Ending on a
191cf2fd8adStls byte boundary allows the partial streams to be concatenated simply as
192cf2fd8adStls sequences of bytes. This adds a very small four to five byte overhead
193cf2fd8adStls (average 3.75 bytes) to the output for each input chunk.
194e25fdb51Smrg
195e25fdb51Smrg The default input block size is 128K, but can be changed with the -b option.
196e25fdb51Smrg The number of compress threads is set by default to 8, which can be changed
197e25fdb51Smrg using the -p option. Specifying -p 1 avoids the use of threads entirely.
198e25fdb51Smrg pigz will try to determine the number of processors in the machine, in which
199e25fdb51Smrg case if that number is two or greater, pigz will use that as the default for
200e25fdb51Smrg -p instead of 8.
201e25fdb51Smrg
202e25fdb51Smrg The input blocks, while compressed independently, have the last 32K of the
203e25fdb51Smrg previous block loaded as a preset dictionary to preserve the compression
204e25fdb51Smrg effectiveness of deflating in a single thread. This can be turned off using
205e25fdb51Smrg the --independent or -i option, so that the blocks can be decompressed
206e25fdb51Smrg independently for partial error recovery or for random access.
207e25fdb51Smrg
208e25fdb51Smrg Decompression can't be parallelized, at least not without specially prepared
209e25fdb51Smrg deflate streams for that purpose. As a result, pigz uses a single thread
210e25fdb51Smrg (the main thread) for decompression, but will create three other threads for
211e25fdb51Smrg reading, writing, and check calculation, which can speed up decompression
212e25fdb51Smrg under some circumstances. Parallel decompression can be turned off by
213e25fdb51Smrg specifying one process (-dp 1 or -tp 1).
214e25fdb51Smrg
215e25fdb51Smrg pigz requires zlib 1.2.1 or later to allow setting the dictionary when doing
216e25fdb51Smrg raw deflate. Since zlib 1.2.3 corrects security vulnerabilities in zlib
217e25fdb51Smrg version 1.2.1 and 1.2.2, conditionals check for zlib 1.2.3 or later during
218e25fdb51Smrg the compilation of pigz.c. zlib 1.2.4 includes some improvements to
219e25fdb51Smrg Z_FULL_FLUSH and deflateSetDictionary() that permit identical output for
220e25fdb51Smrg pigz with and without threads, which is not possible with zlib 1.2.3. This
221e25fdb51Smrg may be important for uses of pigz -R where small changes in the contents
222e25fdb51Smrg should result in small changes in the archive for rsync. Note that due to
223e25fdb51Smrg the details of how the lower levels of compression result in greater speed,
224e25fdb51Smrg compression level 3 and below does not permit identical pigz output with
225e25fdb51Smrg and without threads.
226e25fdb51Smrg
227e25fdb51Smrg pigz uses the POSIX pthread library for thread control and communication,
228e25fdb51Smrg through the yarn.h interface to yarn.c. yarn.c can be replaced with
229e25fdb51Smrg equivalent implementations using other thread libraries. pigz can be
230e25fdb51Smrg compiled with NOTHREAD #defined to not use threads at all (in which case
231e25fdb51Smrg pigz will not be able to live up to the "parallel" in its name).
232e25fdb51Smrg */
233e25fdb51Smrg
234e25fdb51Smrg /*
235e25fdb51Smrg Details of parallel compression implementation:
236e25fdb51Smrg
237e25fdb51Smrg When doing parallel compression, pigz uses the main thread to read the input
238e25fdb51Smrg in 'size' sized chunks (see -b), and puts those in a compression job list,
239e25fdb51Smrg each with a sequence number to keep track of the ordering. If it is not the
240e25fdb51Smrg first chunk, then that job also points to the previous input buffer, from
241e25fdb51Smrg which the last 32K will be used as a dictionary (unless -i is specified).
242e25fdb51Smrg This sets a lower limit of 32K on 'size'.
243e25fdb51Smrg
244e25fdb51Smrg pigz launches up to 'procs' compression threads (see -p). Each compression
245e25fdb51Smrg thread continues to look for jobs in the compression list and perform those
246e25fdb51Smrg jobs until instructed to return. When a job is pulled, the dictionary, if
247e25fdb51Smrg provided, will be loaded into the deflate engine and then that input buffer
248e25fdb51Smrg is dropped for reuse. Then the input data is compressed into an output
249cf2fd8adStls buffer that grows in size if necessary to hold the compressed data. The job
250cf2fd8adStls is then put into the write job list, sorted by the sequence number. The
251cf2fd8adStls compress thread however continues to calculate the check value on the input
252cf2fd8adStls data, either a CRC-32 or Adler-32, possibly in parallel with the write
253e25fdb51Smrg thread writing the output data. Once that's done, the compress thread drops
254e25fdb51Smrg the input buffer and also releases the lock on the check value so that the
255e25fdb51Smrg write thread can combine it with the previous check values. The compress
256e25fdb51Smrg thread has then completed that job, and goes to look for another.
257e25fdb51Smrg
258e25fdb51Smrg All of the compress threads are left running and waiting even after the last
259e25fdb51Smrg chunk is processed, so that they can support the next input to be compressed
260e25fdb51Smrg (more than one input file on the command line). Once pigz is done, it will
261e25fdb51Smrg call all the compress threads home (that'll do pig, that'll do).
262e25fdb51Smrg
263e25fdb51Smrg Before starting to read the input, the main thread launches the write thread
264e25fdb51Smrg so that it is ready pick up jobs immediately. The compress thread puts the
265e25fdb51Smrg write jobs in the list in sequence sorted order, so that the first job in
266e25fdb51Smrg the list is always has the lowest sequence number. The write thread waits
267e25fdb51Smrg for the next write job in sequence, and then gets that job. The job still
268e25fdb51Smrg holds its input buffer, from which the write thread gets the input buffer
269e25fdb51Smrg length for use in check value combination. Then the write thread drops that
270e25fdb51Smrg input buffer to allow its reuse. Holding on to the input buffer until the
271e25fdb51Smrg write thread starts also has the benefit that the read and compress threads
272e25fdb51Smrg can't get way ahead of the write thread and build up a large backlog of
273e25fdb51Smrg unwritten compressed data. The write thread will write the compressed data,
274e25fdb51Smrg drop the output buffer, and then wait for the check value to be unlocked
275e25fdb51Smrg by the compress thread. Then the write thread combines the check value for
276e25fdb51Smrg this chunk with the total check value for eventual use in the trailer. If
277e25fdb51Smrg this is not the last chunk, the write thread then goes back to look for the
278e25fdb51Smrg next output chunk in sequence. After the last chunk, the write thread
279e25fdb51Smrg returns and joins the main thread. Unlike the compress threads, a new write
280e25fdb51Smrg thread is launched for each input stream. The write thread writes the
281e25fdb51Smrg appropriate header and trailer around the compressed data.
282e25fdb51Smrg
283e25fdb51Smrg The input and output buffers are reused through their collection in pools.
284e25fdb51Smrg Each buffer has a use count, which when decremented to zero returns the
285e25fdb51Smrg buffer to the respective pool. Each input buffer has up to three parallel
286e25fdb51Smrg uses: as the input for compression, as the data for the check value
287e25fdb51Smrg calculation, and as a dictionary for compression. Each output buffer has
288e25fdb51Smrg only one use, which is as the output of compression followed serially as
289e25fdb51Smrg data to be written. The input pool is limited in the number of buffers, so
290e25fdb51Smrg that reading does not get way ahead of compression and eat up memory with
291e25fdb51Smrg more input than can be used. The limit is approximately two times the
292e25fdb51Smrg number of compression threads. In the case that reading is fast as compared
293e25fdb51Smrg to compression, that number allows a second set of buffers to be read while
294e25fdb51Smrg the first set of compressions are being performed. The number of output
295e25fdb51Smrg buffers is not directly limited, but is indirectly limited by the release of
296cf2fd8adStls input buffers to about the same number.
297e25fdb51Smrg */
298e25fdb51Smrg
299e25fdb51Smrg /* use large file functions if available */
300e25fdb51Smrg #define _FILE_OFFSET_BITS 64
301e25fdb51Smrg
302e25fdb51Smrg /* included headers and what is expected from each */
303e25fdb51Smrg #include <stdio.h> /* fflush(), fprintf(), fputs(), getchar(), putc(), */
304e25fdb51Smrg /* puts(), printf(), vasprintf(), stderr, EOF, NULL,
305e25fdb51Smrg SEEK_END, size_t, off_t */
306e25fdb51Smrg #include <stdlib.h> /* exit(), malloc(), free(), realloc(), atol(), */
307e25fdb51Smrg /* atoi(), getenv() */
308e25fdb51Smrg #include <stdarg.h> /* va_start(), va_end(), va_list */
309e25fdb51Smrg #include <string.h> /* memset(), memchr(), memcpy(), strcmp(), strcpy() */
310e25fdb51Smrg /* strncpy(), strlen(), strcat(), strrchr() */
311e25fdb51Smrg #include <errno.h> /* errno, EEXIST */
312e25fdb51Smrg #include <assert.h> /* assert() */
313e25fdb51Smrg #include <time.h> /* ctime(), time(), time_t, mktime() */
314e25fdb51Smrg #include <signal.h> /* signal(), SIGINT */
315e25fdb51Smrg #include <sys/types.h> /* ssize_t */
316e25fdb51Smrg #include <sys/stat.h> /* chmod(), stat(), fstat(), lstat(), struct stat, */
317e25fdb51Smrg /* S_IFDIR, S_IFLNK, S_IFMT, S_IFREG */
318e25fdb51Smrg #include <sys/time.h> /* utimes(), gettimeofday(), struct timeval */
319e25fdb51Smrg #include <unistd.h> /* unlink(), _exit(), read(), write(), close(), */
320e25fdb51Smrg /* lseek(), isatty(), chown() */
321e25fdb51Smrg #include <fcntl.h> /* open(), O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, */
322e25fdb51Smrg /* O_WRONLY */
323e25fdb51Smrg #include <dirent.h> /* opendir(), readdir(), closedir(), DIR, */
324e25fdb51Smrg /* struct dirent */
325cf2fd8adStls #include <limits.h> /* PATH_MAX, UINT_MAX, INT_MAX */
326cf2fd8adStls #if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3
327cf2fd8adStls # include <inttypes.h> /* intmax_t */
328cf2fd8adStls #endif
329cf2fd8adStls
330cf2fd8adStls #ifdef DEBUG
331cf2fd8adStls # if defined(__APPLE__)
332cf2fd8adStls # include <malloc/malloc.h>
333cf2fd8adStls # define MALLOC_SIZE(p) malloc_size(p)
334cf2fd8adStls # elif defined (__linux)
335cf2fd8adStls # include <malloc.h>
336cf2fd8adStls # define MALLOC_SIZE(p) malloc_usable_size(p)
337cf2fd8adStls # elif defined (_WIN32) || defined(_WIN64)
338cf2fd8adStls # include <malloc.h>
339cf2fd8adStls # define MALLOC_SIZE(p) _msize(p)
340cf2fd8adStls # else
341cf2fd8adStls # define MALLOC_SIZE(p) (0)
342cf2fd8adStls # endif
343cf2fd8adStls #endif
344cf2fd8adStls
345cf2fd8adStls #ifdef __hpux
346cf2fd8adStls # include <sys/param.h>
347cf2fd8adStls # include <sys/pstat.h>
348cf2fd8adStls #endif
349e25fdb51Smrg
350e25fdb51Smrg #include "zlib.h" /* deflateInit2(), deflateReset(), deflate(), */
351e25fdb51Smrg /* deflateEnd(), deflateSetDictionary(), crc32(),
352e25fdb51Smrg inflateBackInit(), inflateBack(), inflateBackEnd(),
353e25fdb51Smrg Z_DEFAULT_COMPRESSION, Z_DEFAULT_STRATEGY,
354e25fdb51Smrg Z_DEFLATED, Z_NO_FLUSH, Z_NULL, Z_OK,
355e25fdb51Smrg Z_SYNC_FLUSH, z_stream */
356e25fdb51Smrg #if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1230
357e25fdb51Smrg # error Need zlib version 1.2.3 or later
358e25fdb51Smrg #endif
359e25fdb51Smrg
360e25fdb51Smrg #ifndef NOTHREAD
361e25fdb51Smrg # include "yarn.h" /* thread, launch(), join(), join_all(), */
362e25fdb51Smrg /* lock, new_lock(), possess(), twist(), wait_for(),
363e25fdb51Smrg release(), peek_lock(), free_lock(), yarn_name */
364e25fdb51Smrg #endif
365cf2fd8adStls #include "zopfli/deflate.h" /* ZopfliDeflatePart(), ZopfliInitOptions(),
366cf2fd8adStls ZopfliOptions */
367e25fdb51Smrg
368e25fdb51Smrg /* for local functions and globals */
369e25fdb51Smrg #define local static
370e25fdb51Smrg
371e25fdb51Smrg /* prevent end-of-line conversions on MSDOSish operating systems */
372e25fdb51Smrg #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
373e25fdb51Smrg # include <io.h> /* setmode(), O_BINARY */
374e25fdb51Smrg # define SET_BINARY_MODE(fd) setmode(fd, O_BINARY)
375e25fdb51Smrg #else
376e25fdb51Smrg # define SET_BINARY_MODE(fd)
377e25fdb51Smrg #endif
378e25fdb51Smrg
379e25fdb51Smrg /* release an allocated pointer, if allocated, and mark as unallocated */
380e25fdb51Smrg #define RELEASE(ptr) \
381e25fdb51Smrg do { \
382e25fdb51Smrg if ((ptr) != NULL) { \
383cf2fd8adStls FREE(ptr); \
384e25fdb51Smrg ptr = NULL; \
385e25fdb51Smrg } \
386e25fdb51Smrg } while (0)
387e25fdb51Smrg
388cf2fd8adStls /* sliding dictionary size for deflate */
389cf2fd8adStls #define DICT 32768U
390cf2fd8adStls
391cf2fd8adStls /* largest power of 2 that fits in an unsigned int -- used to limit requests
392cf2fd8adStls to zlib functions that use unsigned int lengths */
393cf2fd8adStls #define MAXP2 (UINT_MAX - (UINT_MAX >> 1))
394cf2fd8adStls
395cf2fd8adStls /* rsyncable constants -- RSYNCBITS is the number of bits in the mask for
396cf2fd8adStls comparison. For random input data, there will be a hit on average every
397cf2fd8adStls 1<<RSYNCBITS bytes. So for an RSYNCBITS of 12, there will be an average of
398cf2fd8adStls one hit every 4096 bytes, resulting in a mean block size of 4096. RSYNCMASK
399cf2fd8adStls is the resulting bit mask. RSYNCHIT is what the hash value is compared to
400cf2fd8adStls after applying the mask.
401cf2fd8adStls
402cf2fd8adStls The choice of 12 for RSYNCBITS is consistent with the original rsyncable
403cf2fd8adStls patch for gzip which also uses a 12-bit mask. This results in a relatively
404cf2fd8adStls small hit to compression, on the order of 1.5% to 3%. A mask of 13 bits can
405cf2fd8adStls be used instead if a hit of less than 1% to the compression is desired, at
406cf2fd8adStls the expense of more blocks transmitted for rsync updates. (Your mileage may
407cf2fd8adStls vary.)
408cf2fd8adStls
409cf2fd8adStls This implementation of rsyncable uses a different hash algorithm than what
410cf2fd8adStls the gzip rsyncable patch uses in order to provide better performance in
411cf2fd8adStls several regards. The algorithm is simply to shift the hash value left one
412cf2fd8adStls bit and exclusive-or that with the next byte. This is masked to the number
413cf2fd8adStls of hash bits (RSYNCMASK) and compared to all ones except for a zero in the
414cf2fd8adStls top bit (RSYNCHIT). This rolling hash has a very small window of 19 bytes
415cf2fd8adStls (RSYNCBITS+7). The small window provides the benefit of much more rapid
416cf2fd8adStls resynchronization after a change, than does the 4096-byte window of the gzip
417cf2fd8adStls rsyncable patch.
418cf2fd8adStls
419cf2fd8adStls The comparison value is chosen to avoid matching any repeated bytes or short
420cf2fd8adStls sequences. The gzip rsyncable patch on the other hand uses a sum and zero
421cf2fd8adStls for comparison, which results in certain bad behaviors, such as always
422cf2fd8adStls matching everywhere in a long sequence of zeros. Such sequences occur
423cf2fd8adStls frequently in tar files.
424cf2fd8adStls
425cf2fd8adStls This hash efficiently discards history older than 19 bytes simply by
426cf2fd8adStls shifting that data past the top of the mask -- no history needs to be
427cf2fd8adStls retained to undo its impact on the hash value, as is needed for a sum.
428cf2fd8adStls
429cf2fd8adStls The choice of the comparison value (RSYNCHIT) has the virtue of avoiding
430cf2fd8adStls extremely short blocks. The shortest block is five bytes (RSYNCBITS-7) from
431cf2fd8adStls hit to hit, and is unlikely. Whereas with the gzip rsyncable algorithm,
432cf2fd8adStls blocks of one byte are not only possible, but in fact are the most likely
433cf2fd8adStls block size.
434cf2fd8adStls
435cf2fd8adStls Thanks and acknowledgement to Kevin Day for his experimentation and insights
436cf2fd8adStls on rsyncable hash characteristics that led to some of the choices here.
437cf2fd8adStls */
438cf2fd8adStls #define RSYNCBITS 12
439cf2fd8adStls #define RSYNCMASK ((1U << RSYNCBITS) - 1)
440cf2fd8adStls #define RSYNCHIT (RSYNCMASK >> 1)
441cf2fd8adStls
442cf2fd8adStls /* initial pool counts and sizes -- INBUFS is the limit on the number of input
443cf2fd8adStls spaces as a function of the number of processors (used to throttle the
444cf2fd8adStls creation of compression jobs), OUTPOOL is the initial size of the output
445cf2fd8adStls data buffer, chosen to make resizing of the buffer very unlikely and to
446cf2fd8adStls allow prepending with a dictionary for use as an input buffer for zopfli */
447cf2fd8adStls #define INBUFS(p) (((p)<<1)+3)
448cf2fd8adStls #define OUTPOOL(s) ((s)+((s)>>4)+DICT)
449cf2fd8adStls
450cf2fd8adStls /* input buffer size */
451cf2fd8adStls #define BUF 32768U
452cf2fd8adStls
453e25fdb51Smrg /* globals (modified by main thread only when it's the only thread) */
454cf2fd8adStls local struct {
455cf2fd8adStls char *prog; /* name by which pigz was invoked */
456cf2fd8adStls int ind; /* input file descriptor */
457cf2fd8adStls int outd; /* output file descriptor */
458cf2fd8adStls char inf[PATH_MAX+1]; /* input file name (accommodate recursion) */
459cf2fd8adStls char *outf; /* output file name (allocated if not NULL) */
460cf2fd8adStls int verbosity; /* 0 = quiet, 1 = normal, 2 = verbose, 3 = trace */
461cf2fd8adStls int headis; /* 1 to store name, 2 to store date, 3 both */
462cf2fd8adStls int pipeout; /* write output to stdout even if file */
463cf2fd8adStls int keep; /* true to prevent deletion of input file */
464cf2fd8adStls int force; /* true to overwrite, compress links, cat */
465cf2fd8adStls int form; /* gzip = 0, zlib = 1, zip = 2 or 3 */
466cf2fd8adStls unsigned char magic1; /* first byte of possible header when decoding */
467cf2fd8adStls int recurse; /* true to dive down into directory structure */
468cf2fd8adStls char *sufx; /* suffix to use (".gz" or user supplied) */
469cf2fd8adStls char *name; /* name for gzip header */
470cf2fd8adStls time_t mtime; /* time stamp from input file for gzip header */
471cf2fd8adStls int list; /* true to list files instead of compress */
472cf2fd8adStls int first; /* true if we need to print listing header */
473cf2fd8adStls int decode; /* 0 to compress, 1 to decompress, 2 to test */
474cf2fd8adStls int level; /* compression level */
475cf2fd8adStls ZopfliOptions zopts; /* zopfli compression options */
476cf2fd8adStls int rsync; /* true for rsync blocking */
477cf2fd8adStls int procs; /* maximum number of compression threads (>= 1) */
478cf2fd8adStls int setdict; /* true to initialize dictionary in each thread */
479cf2fd8adStls size_t block; /* uncompressed input size per thread (>= 32K) */
480cf2fd8adStls int warned; /* true if a warning has been given */
481e25fdb51Smrg
482e25fdb51Smrg /* saved gzip/zip header data for decompression, testing, and listing */
483cf2fd8adStls time_t stamp; /* time stamp from gzip header */
484cf2fd8adStls char *hname; /* name from header (allocated) */
485cf2fd8adStls unsigned long zip_crc; /* local header crc */
486cf2fd8adStls unsigned long zip_clen; /* local header compressed length */
487cf2fd8adStls unsigned long zip_ulen; /* local header uncompressed length */
488cf2fd8adStls
489cf2fd8adStls /* globals for decompression and listing buffered reading */
490cf2fd8adStls unsigned char in_buf[BUF]; /* input buffer */
491cf2fd8adStls unsigned char *in_next; /* next unused byte in buffer */
492cf2fd8adStls size_t in_left; /* number of unused bytes in buffer */
493cf2fd8adStls int in_eof; /* true if reached end of file on input */
494cf2fd8adStls int in_short; /* true if last read didn't fill buffer */
495cf2fd8adStls off_t in_tot; /* total bytes read from input */
496cf2fd8adStls off_t out_tot; /* total bytes written to output */
497cf2fd8adStls unsigned long out_check; /* check value of output */
498cf2fd8adStls
499cf2fd8adStls #ifndef NOTHREAD
500cf2fd8adStls /* globals for decompression parallel reading */
501cf2fd8adStls unsigned char in_buf2[BUF]; /* second buffer for parallel reads */
502cf2fd8adStls size_t in_len; /* data waiting in next buffer */
503cf2fd8adStls int in_which; /* -1: start, 0: in_buf2, 1: in_buf */
504cf2fd8adStls lock *load_state; /* value = 0 to wait, 1 to read a buffer */
505cf2fd8adStls thread *load_thread; /* load_read() thread for joining */
506cf2fd8adStls #endif
507cf2fd8adStls } g;
508cf2fd8adStls
509cf2fd8adStls /* display a complaint with the program name on stderr */
complain(char * fmt,...)510cf2fd8adStls local int complain(char *fmt, ...)
511cf2fd8adStls {
512cf2fd8adStls va_list ap;
513cf2fd8adStls
514cf2fd8adStls if (g.verbosity > 0) {
515cf2fd8adStls fprintf(stderr, "%s: ", g.prog);
516cf2fd8adStls va_start(ap, fmt);
517cf2fd8adStls vfprintf(stderr, fmt, ap);
518cf2fd8adStls va_end(ap);
519cf2fd8adStls putc('\n', stderr);
520cf2fd8adStls fflush(stderr);
521cf2fd8adStls g.warned = 1;
522cf2fd8adStls }
523cf2fd8adStls return 0;
524cf2fd8adStls }
525e25fdb51Smrg
526e25fdb51Smrg /* exit with error, delete output file if in the middle of writing it */
bail(char * why,char * what)527e25fdb51Smrg local int bail(char *why, char *what)
528e25fdb51Smrg {
529cf2fd8adStls if (g.outd != -1 && g.outf != NULL)
530cf2fd8adStls unlink(g.outf);
531cf2fd8adStls complain("abort: %s%s", why, what);
532e25fdb51Smrg exit(1);
533e25fdb51Smrg return 0;
534e25fdb51Smrg }
535e25fdb51Smrg
536e25fdb51Smrg #ifdef DEBUG
537e25fdb51Smrg
538cf2fd8adStls /* memory tracking */
539cf2fd8adStls
540cf2fd8adStls local struct mem_track_s {
541cf2fd8adStls size_t num; /* current number of allocations */
542cf2fd8adStls size_t size; /* total size of current allocations */
543cf2fd8adStls size_t max; /* maximum size of allocations */
544cf2fd8adStls #ifndef NOTHREAD
545cf2fd8adStls lock *lock; /* lock for access across threads */
546cf2fd8adStls #endif
547cf2fd8adStls } mem_track;
548cf2fd8adStls
549cf2fd8adStls #ifndef NOTHREAD
550cf2fd8adStls # define mem_track_grab(m) possess((m)->lock)
551cf2fd8adStls # define mem_track_drop(m) release((m)->lock)
552cf2fd8adStls #else
553cf2fd8adStls # define mem_track_grab(m)
554cf2fd8adStls # define mem_track_drop(m)
555cf2fd8adStls #endif
556cf2fd8adStls
malloc_track(struct mem_track_s * mem,size_t size)557cf2fd8adStls local void *malloc_track(struct mem_track_s *mem, size_t size)
558cf2fd8adStls {
559cf2fd8adStls void *ptr;
560cf2fd8adStls
561cf2fd8adStls ptr = malloc(size);
562cf2fd8adStls if (ptr != NULL) {
563cf2fd8adStls size = MALLOC_SIZE(ptr);
564cf2fd8adStls mem_track_grab(mem);
565cf2fd8adStls mem->num++;
566cf2fd8adStls mem->size += size;
567cf2fd8adStls if (mem->size > mem->max)
568cf2fd8adStls mem->max = mem->size;
569cf2fd8adStls mem_track_drop(mem);
570cf2fd8adStls }
571cf2fd8adStls return ptr;
572cf2fd8adStls }
573cf2fd8adStls
realloc_track(struct mem_track_s * mem,void * ptr,size_t size)574cf2fd8adStls local void *realloc_track(struct mem_track_s *mem, void *ptr, size_t size)
575cf2fd8adStls {
576cf2fd8adStls size_t was;
577cf2fd8adStls
578cf2fd8adStls if (ptr == NULL)
579cf2fd8adStls return malloc_track(mem, size);
580cf2fd8adStls was = MALLOC_SIZE(ptr);
581cf2fd8adStls ptr = realloc(ptr, size);
582cf2fd8adStls if (ptr != NULL) {
583cf2fd8adStls size = MALLOC_SIZE(ptr);
584cf2fd8adStls mem_track_grab(mem);
585cf2fd8adStls mem->size -= was;
586cf2fd8adStls mem->size += size;
587cf2fd8adStls if (mem->size > mem->max)
588cf2fd8adStls mem->max = mem->size;
589cf2fd8adStls mem_track_drop(mem);
590cf2fd8adStls }
591cf2fd8adStls return ptr;
592cf2fd8adStls }
593cf2fd8adStls
free_track(struct mem_track_s * mem,void * ptr)594cf2fd8adStls local void free_track(struct mem_track_s *mem, void *ptr)
595cf2fd8adStls {
596cf2fd8adStls size_t size;
597cf2fd8adStls
598cf2fd8adStls if (ptr != NULL) {
599cf2fd8adStls size = MALLOC_SIZE(ptr);
600cf2fd8adStls mem_track_grab(mem);
601cf2fd8adStls mem->num--;
602cf2fd8adStls mem->size -= size;
603cf2fd8adStls mem_track_drop(mem);
604cf2fd8adStls free(ptr);
605cf2fd8adStls }
606cf2fd8adStls }
607cf2fd8adStls
608cf2fd8adStls #ifndef NOTHREAD
yarn_malloc(size_t size)609cf2fd8adStls local void *yarn_malloc(size_t size)
610cf2fd8adStls {
611cf2fd8adStls return malloc_track(&mem_track, size);
612cf2fd8adStls }
613cf2fd8adStls
yarn_free(void * ptr)614cf2fd8adStls local void yarn_free(void *ptr)
615cf2fd8adStls {
616cf2fd8adStls return free_track(&mem_track, ptr);
617cf2fd8adStls }
618cf2fd8adStls #endif
619cf2fd8adStls
zlib_alloc(voidpf opaque,uInt items,uInt size)620cf2fd8adStls local voidpf zlib_alloc(voidpf opaque, uInt items, uInt size)
621cf2fd8adStls {
622cf2fd8adStls return malloc_track(opaque, items * (size_t)size);
623cf2fd8adStls }
624cf2fd8adStls
zlib_free(voidpf opaque,voidpf address)625cf2fd8adStls local void zlib_free(voidpf opaque, voidpf address)
626cf2fd8adStls {
627cf2fd8adStls free_track(opaque, address);
628cf2fd8adStls }
629cf2fd8adStls
630cf2fd8adStls #define MALLOC(s) malloc_track(&mem_track, s)
631cf2fd8adStls #define REALLOC(p, s) realloc_track(&mem_track, p, s)
632cf2fd8adStls #define FREE(p) free_track(&mem_track, p)
633cf2fd8adStls #define OPAQUE (&mem_track)
634cf2fd8adStls #define ZALLOC zlib_alloc
635cf2fd8adStls #define ZFREE zlib_free
636cf2fd8adStls
637e25fdb51Smrg /* starting time of day for tracing */
638e25fdb51Smrg local struct timeval start;
639e25fdb51Smrg
640e25fdb51Smrg /* trace log */
641e25fdb51Smrg local struct log {
642e25fdb51Smrg struct timeval when; /* time of entry */
643e25fdb51Smrg char *msg; /* message */
644e25fdb51Smrg struct log *next; /* next entry */
645e25fdb51Smrg } *log_head, **log_tail = NULL;
646e25fdb51Smrg #ifndef NOTHREAD
647e25fdb51Smrg local lock *log_lock = NULL;
648e25fdb51Smrg #endif
649e25fdb51Smrg
650e25fdb51Smrg /* maximum log entry length */
651e25fdb51Smrg #define MAXMSG 256
652e25fdb51Smrg
653e25fdb51Smrg /* set up log (call from main thread before other threads launched) */
log_init(void)654e25fdb51Smrg local void log_init(void)
655e25fdb51Smrg {
656e25fdb51Smrg if (log_tail == NULL) {
657cf2fd8adStls mem_track.num = 0;
658cf2fd8adStls mem_track.size = 0;
659cf2fd8adStls mem_track.max = 0;
660e25fdb51Smrg #ifndef NOTHREAD
661cf2fd8adStls mem_track.lock = new_lock(0);
662cf2fd8adStls yarn_mem(yarn_malloc, yarn_free);
663e25fdb51Smrg log_lock = new_lock(0);
664e25fdb51Smrg #endif
665e25fdb51Smrg log_head = NULL;
666e25fdb51Smrg log_tail = &log_head;
667e25fdb51Smrg }
668e25fdb51Smrg }
669e25fdb51Smrg
670e25fdb51Smrg /* add entry to trace log */
log_add(char * fmt,...)671e25fdb51Smrg local void log_add(char *fmt, ...)
672e25fdb51Smrg {
673e25fdb51Smrg struct timeval now;
674e25fdb51Smrg struct log *me;
675e25fdb51Smrg va_list ap;
676e25fdb51Smrg char msg[MAXMSG];
677e25fdb51Smrg
678e25fdb51Smrg gettimeofday(&now, NULL);
679cf2fd8adStls me = MALLOC(sizeof(struct log));
680e25fdb51Smrg if (me == NULL)
681e25fdb51Smrg bail("not enough memory", "");
682e25fdb51Smrg me->when = now;
683e25fdb51Smrg va_start(ap, fmt);
684e25fdb51Smrg vsnprintf(msg, MAXMSG, fmt, ap);
685e25fdb51Smrg va_end(ap);
686cf2fd8adStls me->msg = MALLOC(strlen(msg) + 1);
687e25fdb51Smrg if (me->msg == NULL) {
688cf2fd8adStls FREE(me);
689e25fdb51Smrg bail("not enough memory", "");
690e25fdb51Smrg }
691e25fdb51Smrg strcpy(me->msg, msg);
692e25fdb51Smrg me->next = NULL;
693e25fdb51Smrg #ifndef NOTHREAD
694e25fdb51Smrg assert(log_lock != NULL);
695e25fdb51Smrg possess(log_lock);
696e25fdb51Smrg #endif
697e25fdb51Smrg *log_tail = me;
698e25fdb51Smrg log_tail = &(me->next);
699e25fdb51Smrg #ifndef NOTHREAD
700e25fdb51Smrg twist(log_lock, BY, +1);
701e25fdb51Smrg #endif
702e25fdb51Smrg }
703e25fdb51Smrg
704e25fdb51Smrg /* pull entry from trace log and print it, return false if empty */
log_show(void)705e25fdb51Smrg local int log_show(void)
706e25fdb51Smrg {
707e25fdb51Smrg struct log *me;
708e25fdb51Smrg struct timeval diff;
709e25fdb51Smrg
710e25fdb51Smrg if (log_tail == NULL)
711e25fdb51Smrg return 0;
712e25fdb51Smrg #ifndef NOTHREAD
713e25fdb51Smrg possess(log_lock);
714e25fdb51Smrg #endif
715e25fdb51Smrg me = log_head;
716e25fdb51Smrg if (me == NULL) {
717e25fdb51Smrg #ifndef NOTHREAD
718e25fdb51Smrg release(log_lock);
719e25fdb51Smrg #endif
720e25fdb51Smrg return 0;
721e25fdb51Smrg }
722e25fdb51Smrg log_head = me->next;
723e25fdb51Smrg if (me->next == NULL)
724e25fdb51Smrg log_tail = &log_head;
725e25fdb51Smrg #ifndef NOTHREAD
726e25fdb51Smrg twist(log_lock, BY, -1);
727e25fdb51Smrg #endif
728e25fdb51Smrg diff.tv_usec = me->when.tv_usec - start.tv_usec;
729e25fdb51Smrg diff.tv_sec = me->when.tv_sec - start.tv_sec;
730e25fdb51Smrg if (diff.tv_usec < 0) {
731e25fdb51Smrg diff.tv_usec += 1000000L;
732e25fdb51Smrg diff.tv_sec--;
733e25fdb51Smrg }
734e25fdb51Smrg fprintf(stderr, "trace %ld.%06ld %s\n",
735e25fdb51Smrg (long)diff.tv_sec, (long)diff.tv_usec, me->msg);
736e25fdb51Smrg fflush(stderr);
737cf2fd8adStls FREE(me->msg);
738cf2fd8adStls FREE(me);
739e25fdb51Smrg return 1;
740e25fdb51Smrg }
741e25fdb51Smrg
742e25fdb51Smrg /* release log resources (need to do log_init() to use again) */
log_free(void)743e25fdb51Smrg local void log_free(void)
744e25fdb51Smrg {
745e25fdb51Smrg struct log *me;
746e25fdb51Smrg
747e25fdb51Smrg if (log_tail != NULL) {
748e25fdb51Smrg #ifndef NOTHREAD
749e25fdb51Smrg possess(log_lock);
750e25fdb51Smrg #endif
751e25fdb51Smrg while ((me = log_head) != NULL) {
752e25fdb51Smrg log_head = me->next;
753cf2fd8adStls FREE(me->msg);
754cf2fd8adStls FREE(me);
755e25fdb51Smrg }
756e25fdb51Smrg #ifndef NOTHREAD
757e25fdb51Smrg twist(log_lock, TO, 0);
758e25fdb51Smrg free_lock(log_lock);
759e25fdb51Smrg log_lock = NULL;
760cf2fd8adStls yarn_mem(malloc, free);
761cf2fd8adStls free_lock(mem_track.lock);
762e25fdb51Smrg #endif
763e25fdb51Smrg log_tail = NULL;
764e25fdb51Smrg }
765e25fdb51Smrg }
766e25fdb51Smrg
767e25fdb51Smrg /* show entries until no more, free log */
log_dump(void)768e25fdb51Smrg local void log_dump(void)
769e25fdb51Smrg {
770e25fdb51Smrg if (log_tail == NULL)
771e25fdb51Smrg return;
772e25fdb51Smrg while (log_show())
773e25fdb51Smrg ;
774e25fdb51Smrg log_free();
775cf2fd8adStls if (mem_track.num || mem_track.size)
776cf2fd8adStls complain("memory leak: %lu allocs of %lu bytes total",
777cf2fd8adStls mem_track.num, mem_track.size);
778cf2fd8adStls if (mem_track.max)
779cf2fd8adStls fprintf(stderr, "%lu bytes of memory used\n", mem_track.max);
780e25fdb51Smrg }
781e25fdb51Smrg
782e25fdb51Smrg /* debugging macro */
783e25fdb51Smrg #define Trace(x) \
784e25fdb51Smrg do { \
785cf2fd8adStls if (g.verbosity > 2) { \
786e25fdb51Smrg log_add x; \
787e25fdb51Smrg } \
788e25fdb51Smrg } while (0)
789e25fdb51Smrg
790e25fdb51Smrg #else /* !DEBUG */
791e25fdb51Smrg
792cf2fd8adStls #define MALLOC malloc
793cf2fd8adStls #define REALLOC realloc
794cf2fd8adStls #define FREE free
795cf2fd8adStls #define OPAQUE Z_NULL
796cf2fd8adStls #define ZALLOC Z_NULL
797cf2fd8adStls #define ZFREE Z_NULL
798cf2fd8adStls
799e25fdb51Smrg #define log_dump()
800e25fdb51Smrg #define Trace(x)
801e25fdb51Smrg
802e25fdb51Smrg #endif
803e25fdb51Smrg
804e25fdb51Smrg /* read up to len bytes into buf, repeating read() calls as needed */
readn(int desc,unsigned char * buf,size_t len)805e25fdb51Smrg local size_t readn(int desc, unsigned char *buf, size_t len)
806e25fdb51Smrg {
807e25fdb51Smrg ssize_t ret;
808e25fdb51Smrg size_t got;
809e25fdb51Smrg
810e25fdb51Smrg got = 0;
811e25fdb51Smrg while (len) {
812e25fdb51Smrg ret = read(desc, buf, len);
813e25fdb51Smrg if (ret < 0)
814cf2fd8adStls bail("read error on ", g.inf);
815e25fdb51Smrg if (ret == 0)
816e25fdb51Smrg break;
817e25fdb51Smrg buf += ret;
818e25fdb51Smrg len -= ret;
819e25fdb51Smrg got += ret;
820e25fdb51Smrg }
821e25fdb51Smrg return got;
822e25fdb51Smrg }
823e25fdb51Smrg
824e25fdb51Smrg /* write len bytes, repeating write() calls as needed */
writen(int desc,unsigned char * buf,size_t len)825e25fdb51Smrg local void writen(int desc, unsigned char *buf, size_t len)
826e25fdb51Smrg {
827e25fdb51Smrg ssize_t ret;
828e25fdb51Smrg
829e25fdb51Smrg while (len) {
830e25fdb51Smrg ret = write(desc, buf, len);
831cf2fd8adStls if (ret < 1) {
832cf2fd8adStls complain("write error code %d", errno);
833cf2fd8adStls bail("write error on ", g.outf);
834cf2fd8adStls }
835e25fdb51Smrg buf += ret;
836e25fdb51Smrg len -= ret;
837e25fdb51Smrg }
838e25fdb51Smrg }
839e25fdb51Smrg
840e25fdb51Smrg /* convert Unix time to MS-DOS date and time, assuming current timezone
841e25fdb51Smrg (you got a better idea?) */
time2dos(time_t t)842e25fdb51Smrg local unsigned long time2dos(time_t t)
843e25fdb51Smrg {
844e25fdb51Smrg struct tm *tm;
845e25fdb51Smrg unsigned long dos;
846e25fdb51Smrg
847e25fdb51Smrg if (t == 0)
848e25fdb51Smrg t = time(NULL);
849e25fdb51Smrg tm = localtime(&t);
850e25fdb51Smrg if (tm->tm_year < 80 || tm->tm_year > 207)
851e25fdb51Smrg return 0;
852e25fdb51Smrg dos = (tm->tm_year - 80) << 25;
853e25fdb51Smrg dos += (tm->tm_mon + 1) << 21;
854e25fdb51Smrg dos += tm->tm_mday << 16;
855e25fdb51Smrg dos += tm->tm_hour << 11;
856e25fdb51Smrg dos += tm->tm_min << 5;
857e25fdb51Smrg dos += (tm->tm_sec + 1) >> 1; /* round to double-seconds */
858e25fdb51Smrg return dos;
859e25fdb51Smrg }
860e25fdb51Smrg
861e25fdb51Smrg /* put a 4-byte integer into a byte array in LSB order or MSB order */
862e25fdb51Smrg #define PUT2L(a,b) (*(a)=(b)&0xff,(a)[1]=(b)>>8)
863e25fdb51Smrg #define PUT4L(a,b) (PUT2L(a,(b)&0xffff),PUT2L((a)+2,(b)>>16))
864e25fdb51Smrg #define PUT4M(a,b) (*(a)=(b)>>24,(a)[1]=(b)>>16,(a)[2]=(b)>>8,(a)[3]=(b))
865e25fdb51Smrg
866e25fdb51Smrg /* write a gzip, zlib, or zip header using the information in the globals */
put_header(void)867e25fdb51Smrg local unsigned long put_header(void)
868e25fdb51Smrg {
869e25fdb51Smrg unsigned long len;
870e25fdb51Smrg unsigned char head[30];
871e25fdb51Smrg
872cf2fd8adStls if (g.form > 1) { /* zip */
873e25fdb51Smrg /* write local header */
874e25fdb51Smrg PUT4L(head, 0x04034b50UL); /* local header signature */
875e25fdb51Smrg PUT2L(head + 4, 20); /* version needed to extract (2.0) */
876e25fdb51Smrg PUT2L(head + 6, 8); /* flags: data descriptor follows data */
877e25fdb51Smrg PUT2L(head + 8, 8); /* deflate */
878cf2fd8adStls PUT4L(head + 10, time2dos(g.mtime));
879e25fdb51Smrg PUT4L(head + 14, 0); /* crc (not here) */
880e25fdb51Smrg PUT4L(head + 18, 0); /* compressed length (not here) */
881e25fdb51Smrg PUT4L(head + 22, 0); /* uncompressed length (not here) */
882cf2fd8adStls PUT2L(head + 26, g.name == NULL ? 1 : /* length of name */
883cf2fd8adStls strlen(g.name));
884e25fdb51Smrg PUT2L(head + 28, 9); /* length of extra field (see below) */
885cf2fd8adStls writen(g.outd, head, 30); /* write local header */
886e25fdb51Smrg len = 30;
887e25fdb51Smrg
888e25fdb51Smrg /* write file name (use "-" for stdin) */
889cf2fd8adStls if (g.name == NULL)
890cf2fd8adStls writen(g.outd, (unsigned char *)"-", 1);
891e25fdb51Smrg else
892cf2fd8adStls writen(g.outd, (unsigned char *)g.name, strlen(g.name));
893cf2fd8adStls len += g.name == NULL ? 1 : strlen(g.name);
894e25fdb51Smrg
895e25fdb51Smrg /* write extended timestamp extra field block (9 bytes) */
896e25fdb51Smrg PUT2L(head, 0x5455); /* extended timestamp signature */
897e25fdb51Smrg PUT2L(head + 2, 5); /* number of data bytes in this block */
898e25fdb51Smrg head[4] = 1; /* flag presence of mod time */
899cf2fd8adStls PUT4L(head + 5, g.mtime); /* mod time */
900cf2fd8adStls writen(g.outd, head, 9); /* write extra field block */
901e25fdb51Smrg len += 9;
902e25fdb51Smrg }
903cf2fd8adStls else if (g.form) { /* zlib */
904e25fdb51Smrg head[0] = 0x78; /* deflate, 32K window */
905cf2fd8adStls head[1] = (g.level >= 9 ? 3 :
906cf2fd8adStls (g.level == 1 ? 0 :
907cf2fd8adStls (g.level >= 6 || g.level == Z_DEFAULT_COMPRESSION ?
908cf2fd8adStls 1 : 2))) << 6;
909e25fdb51Smrg head[1] += 31 - (((head[0] << 8) + head[1]) % 31);
910cf2fd8adStls writen(g.outd, head, 2);
911e25fdb51Smrg len = 2;
912e25fdb51Smrg }
913e25fdb51Smrg else { /* gzip */
914e25fdb51Smrg head[0] = 31;
915e25fdb51Smrg head[1] = 139;
916e25fdb51Smrg head[2] = 8; /* deflate */
917cf2fd8adStls head[3] = g.name != NULL ? 8 : 0;
918cf2fd8adStls PUT4L(head + 4, g.mtime);
919cf2fd8adStls head[8] = g.level >= 9 ? 2 : (g.level == 1 ? 4 : 0);
920e25fdb51Smrg head[9] = 3; /* unix */
921cf2fd8adStls writen(g.outd, head, 10);
922e25fdb51Smrg len = 10;
923cf2fd8adStls if (g.name != NULL)
924cf2fd8adStls writen(g.outd, (unsigned char *)g.name, strlen(g.name) + 1);
925cf2fd8adStls if (g.name != NULL)
926cf2fd8adStls len += strlen(g.name) + 1;
927e25fdb51Smrg }
928e25fdb51Smrg return len;
929e25fdb51Smrg }
930e25fdb51Smrg
931e25fdb51Smrg /* write a gzip, zlib, or zip trailer */
put_trailer(unsigned long ulen,unsigned long clen,unsigned long check,unsigned long head)932e25fdb51Smrg local void put_trailer(unsigned long ulen, unsigned long clen,
933e25fdb51Smrg unsigned long check, unsigned long head)
934e25fdb51Smrg {
935e25fdb51Smrg unsigned char tail[46];
936e25fdb51Smrg
937cf2fd8adStls if (g.form > 1) { /* zip */
938e25fdb51Smrg unsigned long cent;
939e25fdb51Smrg
940e25fdb51Smrg /* write data descriptor (as promised in local header) */
941cf2fd8adStls PUT4L(tail, 0x08074b50UL);
942cf2fd8adStls PUT4L(tail + 4, check);
943cf2fd8adStls PUT4L(tail + 8, clen);
944cf2fd8adStls PUT4L(tail + 12, ulen);
945cf2fd8adStls writen(g.outd, tail, 16);
946e25fdb51Smrg
947e25fdb51Smrg /* write central file header */
948e25fdb51Smrg PUT4L(tail, 0x02014b50UL); /* central header signature */
949e25fdb51Smrg tail[4] = 63; /* obeyed version 6.3 of the zip spec */
950e25fdb51Smrg tail[5] = 255; /* ignore external attributes */
951e25fdb51Smrg PUT2L(tail + 6, 20); /* version needed to extract (2.0) */
952e25fdb51Smrg PUT2L(tail + 8, 8); /* data descriptor is present */
953e25fdb51Smrg PUT2L(tail + 10, 8); /* deflate */
954cf2fd8adStls PUT4L(tail + 12, time2dos(g.mtime));
955e25fdb51Smrg PUT4L(tail + 16, check); /* crc */
956e25fdb51Smrg PUT4L(tail + 20, clen); /* compressed length */
957e25fdb51Smrg PUT4L(tail + 24, ulen); /* uncompressed length */
958cf2fd8adStls PUT2L(tail + 28, g.name == NULL ? 1 : /* length of name */
959cf2fd8adStls strlen(g.name));
960e25fdb51Smrg PUT2L(tail + 30, 9); /* length of extra field (see below) */
961e25fdb51Smrg PUT2L(tail + 32, 0); /* no file comment */
962e25fdb51Smrg PUT2L(tail + 34, 0); /* disk number 0 */
963e25fdb51Smrg PUT2L(tail + 36, 0); /* internal file attributes */
964e25fdb51Smrg PUT4L(tail + 38, 0); /* external file attributes (ignored) */
965e25fdb51Smrg PUT4L(tail + 42, 0); /* offset of local header */
966cf2fd8adStls writen(g.outd, tail, 46); /* write central file header */
967e25fdb51Smrg cent = 46;
968e25fdb51Smrg
969e25fdb51Smrg /* write file name (use "-" for stdin) */
970cf2fd8adStls if (g.name == NULL)
971cf2fd8adStls writen(g.outd, (unsigned char *)"-", 1);
972e25fdb51Smrg else
973cf2fd8adStls writen(g.outd, (unsigned char *)g.name, strlen(g.name));
974cf2fd8adStls cent += g.name == NULL ? 1 : strlen(g.name);
975e25fdb51Smrg
976e25fdb51Smrg /* write extended timestamp extra field block (9 bytes) */
977e25fdb51Smrg PUT2L(tail, 0x5455); /* extended timestamp signature */
978e25fdb51Smrg PUT2L(tail + 2, 5); /* number of data bytes in this block */
979e25fdb51Smrg tail[4] = 1; /* flag presence of mod time */
980cf2fd8adStls PUT4L(tail + 5, g.mtime); /* mod time */
981cf2fd8adStls writen(g.outd, tail, 9); /* write extra field block */
982e25fdb51Smrg cent += 9;
983e25fdb51Smrg
984e25fdb51Smrg /* write end of central directory record */
985e25fdb51Smrg PUT4L(tail, 0x06054b50UL); /* end of central directory signature */
986e25fdb51Smrg PUT2L(tail + 4, 0); /* number of this disk */
987e25fdb51Smrg PUT2L(tail + 6, 0); /* disk with start of central directory */
988e25fdb51Smrg PUT2L(tail + 8, 1); /* number of entries on this disk */
989e25fdb51Smrg PUT2L(tail + 10, 1); /* total number of entries */
990e25fdb51Smrg PUT4L(tail + 12, cent); /* size of central directory */
991cf2fd8adStls PUT4L(tail + 16, head + clen + 16); /* offset of central directory */
992e25fdb51Smrg PUT2L(tail + 20, 0); /* no zip file comment */
993cf2fd8adStls writen(g.outd, tail, 22); /* write end of central directory record */
994e25fdb51Smrg }
995cf2fd8adStls else if (g.form) { /* zlib */
996e25fdb51Smrg PUT4M(tail, check);
997cf2fd8adStls writen(g.outd, tail, 4);
998e25fdb51Smrg }
999e25fdb51Smrg else { /* gzip */
1000e25fdb51Smrg PUT4L(tail, check);
1001e25fdb51Smrg PUT4L(tail + 4, ulen);
1002cf2fd8adStls writen(g.outd, tail, 8);
1003e25fdb51Smrg }
1004e25fdb51Smrg }
1005e25fdb51Smrg
1006e25fdb51Smrg /* compute check value depending on format */
1007cf2fd8adStls #define CHECK(a,b,c) (g.form == 1 ? adler32(a,b,c) : crc32(a,b,c))
1008e25fdb51Smrg
1009e25fdb51Smrg #ifndef NOTHREAD
1010e25fdb51Smrg /* -- threaded portions of pigz -- */
1011e25fdb51Smrg
1012e25fdb51Smrg /* -- check value combination routines for parallel calculation -- */
1013e25fdb51Smrg
1014cf2fd8adStls #define COMB(a,b,c) (g.form == 1 ? adler32_comb(a,b,c) : crc32_comb(a,b,c))
1015e25fdb51Smrg /* combine two crc-32's or two adler-32's (copied from zlib 1.2.3 so that pigz
1016e25fdb51Smrg can be compatible with older versions of zlib) */
1017e25fdb51Smrg
1018e25fdb51Smrg /* we copy the combination routines from zlib here, in order to avoid
1019e25fdb51Smrg linkage issues with the zlib 1.2.3 builds on Sun, Ubuntu, and others */
1020e25fdb51Smrg
gf2_matrix_times(unsigned long * mat,unsigned long vec)1021e25fdb51Smrg local unsigned long gf2_matrix_times(unsigned long *mat, unsigned long vec)
1022e25fdb51Smrg {
1023e25fdb51Smrg unsigned long sum;
1024e25fdb51Smrg
1025e25fdb51Smrg sum = 0;
1026e25fdb51Smrg while (vec) {
1027e25fdb51Smrg if (vec & 1)
1028e25fdb51Smrg sum ^= *mat;
1029e25fdb51Smrg vec >>= 1;
1030e25fdb51Smrg mat++;
1031e25fdb51Smrg }
1032e25fdb51Smrg return sum;
1033e25fdb51Smrg }
1034e25fdb51Smrg
gf2_matrix_square(unsigned long * square,unsigned long * mat)1035e25fdb51Smrg local void gf2_matrix_square(unsigned long *square, unsigned long *mat)
1036e25fdb51Smrg {
1037e25fdb51Smrg int n;
1038e25fdb51Smrg
1039e25fdb51Smrg for (n = 0; n < 32; n++)
1040e25fdb51Smrg square[n] = gf2_matrix_times(mat, mat[n]);
1041e25fdb51Smrg }
1042e25fdb51Smrg
crc32_comb(unsigned long crc1,unsigned long crc2,size_t len2)1043e25fdb51Smrg local unsigned long crc32_comb(unsigned long crc1, unsigned long crc2,
1044e25fdb51Smrg size_t len2)
1045e25fdb51Smrg {
1046e25fdb51Smrg int n;
1047e25fdb51Smrg unsigned long row;
1048e25fdb51Smrg unsigned long even[32]; /* even-power-of-two zeros operator */
1049e25fdb51Smrg unsigned long odd[32]; /* odd-power-of-two zeros operator */
1050e25fdb51Smrg
1051e25fdb51Smrg /* degenerate case */
1052e25fdb51Smrg if (len2 == 0)
1053e25fdb51Smrg return crc1;
1054e25fdb51Smrg
1055e25fdb51Smrg /* put operator for one zero bit in odd */
1056e25fdb51Smrg odd[0] = 0xedb88320UL; /* CRC-32 polynomial */
1057e25fdb51Smrg row = 1;
1058e25fdb51Smrg for (n = 1; n < 32; n++) {
1059e25fdb51Smrg odd[n] = row;
1060e25fdb51Smrg row <<= 1;
1061e25fdb51Smrg }
1062e25fdb51Smrg
1063e25fdb51Smrg /* put operator for two zero bits in even */
1064e25fdb51Smrg gf2_matrix_square(even, odd);
1065e25fdb51Smrg
1066e25fdb51Smrg /* put operator for four zero bits in odd */
1067e25fdb51Smrg gf2_matrix_square(odd, even);
1068e25fdb51Smrg
1069e25fdb51Smrg /* apply len2 zeros to crc1 (first square will put the operator for one
1070e25fdb51Smrg zero byte, eight zero bits, in even) */
1071e25fdb51Smrg do {
1072e25fdb51Smrg /* apply zeros operator for this bit of len2 */
1073e25fdb51Smrg gf2_matrix_square(even, odd);
1074e25fdb51Smrg if (len2 & 1)
1075e25fdb51Smrg crc1 = gf2_matrix_times(even, crc1);
1076e25fdb51Smrg len2 >>= 1;
1077e25fdb51Smrg
1078e25fdb51Smrg /* if no more bits set, then done */
1079e25fdb51Smrg if (len2 == 0)
1080e25fdb51Smrg break;
1081e25fdb51Smrg
1082e25fdb51Smrg /* another iteration of the loop with odd and even swapped */
1083e25fdb51Smrg gf2_matrix_square(odd, even);
1084e25fdb51Smrg if (len2 & 1)
1085e25fdb51Smrg crc1 = gf2_matrix_times(odd, crc1);
1086e25fdb51Smrg len2 >>= 1;
1087e25fdb51Smrg
1088e25fdb51Smrg /* if no more bits set, then done */
1089e25fdb51Smrg } while (len2 != 0);
1090e25fdb51Smrg
1091e25fdb51Smrg /* return combined crc */
1092e25fdb51Smrg crc1 ^= crc2;
1093e25fdb51Smrg return crc1;
1094e25fdb51Smrg }
1095e25fdb51Smrg
1096e25fdb51Smrg #define BASE 65521U /* largest prime smaller than 65536 */
1097e25fdb51Smrg #define LOW16 0xffff /* mask lower 16 bits */
1098e25fdb51Smrg
adler32_comb(unsigned long adler1,unsigned long adler2,size_t len2)1099e25fdb51Smrg local unsigned long adler32_comb(unsigned long adler1, unsigned long adler2,
1100e25fdb51Smrg size_t len2)
1101e25fdb51Smrg {
1102e25fdb51Smrg unsigned long sum1;
1103e25fdb51Smrg unsigned long sum2;
1104e25fdb51Smrg unsigned rem;
1105e25fdb51Smrg
1106e25fdb51Smrg /* the derivation of this formula is left as an exercise for the reader */
1107e25fdb51Smrg rem = (unsigned)(len2 % BASE);
1108e25fdb51Smrg sum1 = adler1 & LOW16;
1109e25fdb51Smrg sum2 = (rem * sum1) % BASE;
1110e25fdb51Smrg sum1 += (adler2 & LOW16) + BASE - 1;
1111e25fdb51Smrg sum2 += ((adler1 >> 16) & LOW16) + ((adler2 >> 16) & LOW16) + BASE - rem;
1112e25fdb51Smrg if (sum1 >= BASE) sum1 -= BASE;
1113e25fdb51Smrg if (sum1 >= BASE) sum1 -= BASE;
1114e25fdb51Smrg if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1);
1115e25fdb51Smrg if (sum2 >= BASE) sum2 -= BASE;
1116e25fdb51Smrg return sum1 | (sum2 << 16);
1117e25fdb51Smrg }
1118e25fdb51Smrg
1119e25fdb51Smrg /* -- pool of spaces for buffer management -- */
1120e25fdb51Smrg
1121e25fdb51Smrg /* These routines manage a pool of spaces. Each pool specifies a fixed size
1122e25fdb51Smrg buffer to be contained in each space. Each space has a use count, which
1123e25fdb51Smrg when decremented to zero returns the space to the pool. If a space is
1124e25fdb51Smrg requested from the pool and the pool is empty, a space is immediately
1125e25fdb51Smrg created unless a specified limit on the number of spaces has been reached.
1126e25fdb51Smrg Only if the limit is reached will it wait for a space to be returned to the
1127e25fdb51Smrg pool. Each space knows what pool it belongs to, so that it can be returned.
1128e25fdb51Smrg */
1129e25fdb51Smrg
1130e25fdb51Smrg /* a space (one buffer for each space) */
1131e25fdb51Smrg struct space {
1132e25fdb51Smrg lock *use; /* use count -- return to pool when zero */
1133cf2fd8adStls unsigned char *buf; /* buffer of size size */
1134cf2fd8adStls size_t size; /* current size of this buffer */
1135cf2fd8adStls size_t len; /* for application usage (initially zero) */
1136e25fdb51Smrg struct pool *pool; /* pool to return to */
1137e25fdb51Smrg struct space *next; /* for pool linked list */
1138e25fdb51Smrg };
1139e25fdb51Smrg
1140cf2fd8adStls /* pool of spaces (one pool for each type needed) */
1141e25fdb51Smrg struct pool {
1142e25fdb51Smrg lock *have; /* unused spaces available, lock for list */
1143e25fdb51Smrg struct space *head; /* linked list of available buffers */
1144cf2fd8adStls size_t size; /* size of new buffers in this pool */
1145e25fdb51Smrg int limit; /* number of new spaces allowed, or -1 */
1146e25fdb51Smrg int made; /* number of buffers made */
1147e25fdb51Smrg };
1148e25fdb51Smrg
1149e25fdb51Smrg /* initialize a pool (pool structure itself provided, not allocated) -- the
1150e25fdb51Smrg limit is the maximum number of spaces in the pool, or -1 to indicate no
1151e25fdb51Smrg limit, i.e., to never wait for a buffer to return to the pool */
new_pool(struct pool * pool,size_t size,int limit)1152e25fdb51Smrg local void new_pool(struct pool *pool, size_t size, int limit)
1153e25fdb51Smrg {
1154e25fdb51Smrg pool->have = new_lock(0);
1155e25fdb51Smrg pool->head = NULL;
1156e25fdb51Smrg pool->size = size;
1157e25fdb51Smrg pool->limit = limit;
1158e25fdb51Smrg pool->made = 0;
1159e25fdb51Smrg }
1160e25fdb51Smrg
1161e25fdb51Smrg /* get a space from a pool -- the use count is initially set to one, so there
1162e25fdb51Smrg is no need to call use_space() for the first use */
get_space(struct pool * pool)1163e25fdb51Smrg local struct space *get_space(struct pool *pool)
1164e25fdb51Smrg {
1165e25fdb51Smrg struct space *space;
1166e25fdb51Smrg
1167e25fdb51Smrg /* if can't create any more, wait for a space to show up */
1168e25fdb51Smrg possess(pool->have);
1169e25fdb51Smrg if (pool->limit == 0)
1170e25fdb51Smrg wait_for(pool->have, NOT_TO_BE, 0);
1171e25fdb51Smrg
1172e25fdb51Smrg /* if a space is available, pull it from the list and return it */
1173e25fdb51Smrg if (pool->head != NULL) {
1174e25fdb51Smrg space = pool->head;
1175e25fdb51Smrg possess(space->use);
1176e25fdb51Smrg pool->head = space->next;
1177e25fdb51Smrg twist(pool->have, BY, -1); /* one less in pool */
1178e25fdb51Smrg twist(space->use, TO, 1); /* initially one user */
1179cf2fd8adStls space->len = 0;
1180e25fdb51Smrg return space;
1181e25fdb51Smrg }
1182e25fdb51Smrg
1183e25fdb51Smrg /* nothing available, don't want to wait, make a new space */
1184e25fdb51Smrg assert(pool->limit != 0);
1185e25fdb51Smrg if (pool->limit > 0)
1186e25fdb51Smrg pool->limit--;
1187e25fdb51Smrg pool->made++;
1188e25fdb51Smrg release(pool->have);
1189cf2fd8adStls space = MALLOC(sizeof(struct space));
1190e25fdb51Smrg if (space == NULL)
1191e25fdb51Smrg bail("not enough memory", "");
1192e25fdb51Smrg space->use = new_lock(1); /* initially one user */
1193cf2fd8adStls space->buf = MALLOC(pool->size);
1194e25fdb51Smrg if (space->buf == NULL)
1195e25fdb51Smrg bail("not enough memory", "");
1196cf2fd8adStls space->size = pool->size;
1197cf2fd8adStls space->len = 0;
1198e25fdb51Smrg space->pool = pool; /* remember the pool this belongs to */
1199e25fdb51Smrg return space;
1200e25fdb51Smrg }
1201e25fdb51Smrg
1202cf2fd8adStls /* compute next size up by multiplying by about 2**(1/3) and round to the next
1203cf2fd8adStls power of 2 if we're close (so three applications results in doubling) -- if
1204cf2fd8adStls small, go up to at least 16, if overflow, go to max size_t value */
grow(size_t size)1205cf2fd8adStls local size_t grow(size_t size)
1206cf2fd8adStls {
1207cf2fd8adStls size_t was, top;
1208cf2fd8adStls int shift;
1209cf2fd8adStls
1210cf2fd8adStls was = size;
1211cf2fd8adStls size += size >> 2;
1212cf2fd8adStls top = size;
1213cf2fd8adStls for (shift = 0; top > 7; shift++)
1214cf2fd8adStls top >>= 1;
1215cf2fd8adStls if (top == 7)
1216cf2fd8adStls size = (size_t)1 << (shift + 3);
1217cf2fd8adStls if (size < 16)
1218cf2fd8adStls size = 16;
1219cf2fd8adStls if (size <= was)
1220cf2fd8adStls size = (size_t)0 - 1;
1221cf2fd8adStls return size;
1222cf2fd8adStls }
1223cf2fd8adStls
1224cf2fd8adStls /* increase the size of the buffer in space */
grow_space(struct space * space)1225cf2fd8adStls local void grow_space(struct space *space)
1226cf2fd8adStls {
1227cf2fd8adStls size_t more;
1228cf2fd8adStls
1229cf2fd8adStls /* compute next size up */
1230cf2fd8adStls more = grow(space->size);
1231cf2fd8adStls if (more == space->size)
1232cf2fd8adStls bail("not enough memory", "");
1233cf2fd8adStls
1234cf2fd8adStls /* reallocate the buffer */
1235cf2fd8adStls space->buf = REALLOC(space->buf, more);
1236cf2fd8adStls if (space->buf == NULL)
1237cf2fd8adStls bail("not enough memory", "");
1238cf2fd8adStls space->size = more;
1239cf2fd8adStls }
1240cf2fd8adStls
1241e25fdb51Smrg /* increment the use count to require one more drop before returning this space
1242e25fdb51Smrg to the pool */
use_space(struct space * space)1243e25fdb51Smrg local void use_space(struct space *space)
1244e25fdb51Smrg {
1245e25fdb51Smrg possess(space->use);
1246e25fdb51Smrg twist(space->use, BY, +1);
1247e25fdb51Smrg }
1248e25fdb51Smrg
1249e25fdb51Smrg /* drop a space, returning it to the pool if the use count is zero */
drop_space(struct space * space)1250e25fdb51Smrg local void drop_space(struct space *space)
1251e25fdb51Smrg {
1252e25fdb51Smrg int use;
1253e25fdb51Smrg struct pool *pool;
1254e25fdb51Smrg
1255e25fdb51Smrg possess(space->use);
1256e25fdb51Smrg use = peek_lock(space->use);
1257e25fdb51Smrg assert(use != 0);
1258e25fdb51Smrg if (use == 1) {
1259e25fdb51Smrg pool = space->pool;
1260e25fdb51Smrg possess(pool->have);
1261e25fdb51Smrg space->next = pool->head;
1262e25fdb51Smrg pool->head = space;
1263e25fdb51Smrg twist(pool->have, BY, +1);
1264e25fdb51Smrg }
1265e25fdb51Smrg twist(space->use, BY, -1);
1266e25fdb51Smrg }
1267e25fdb51Smrg
1268e25fdb51Smrg /* free the memory and lock resources of a pool -- return number of spaces for
1269e25fdb51Smrg debugging and resource usage measurement */
free_pool(struct pool * pool)1270e25fdb51Smrg local int free_pool(struct pool *pool)
1271e25fdb51Smrg {
1272e25fdb51Smrg int count;
1273e25fdb51Smrg struct space *space;
1274e25fdb51Smrg
1275e25fdb51Smrg possess(pool->have);
1276e25fdb51Smrg count = 0;
1277e25fdb51Smrg while ((space = pool->head) != NULL) {
1278e25fdb51Smrg pool->head = space->next;
1279cf2fd8adStls FREE(space->buf);
1280e25fdb51Smrg free_lock(space->use);
1281cf2fd8adStls FREE(space);
1282e25fdb51Smrg count++;
1283e25fdb51Smrg }
1284cf2fd8adStls assert(count == pool->made);
1285e25fdb51Smrg release(pool->have);
1286e25fdb51Smrg free_lock(pool->have);
1287e25fdb51Smrg return count;
1288e25fdb51Smrg }
1289e25fdb51Smrg
1290e25fdb51Smrg /* input and output buffer pools */
1291e25fdb51Smrg local struct pool in_pool;
1292e25fdb51Smrg local struct pool out_pool;
1293cf2fd8adStls local struct pool dict_pool;
1294cf2fd8adStls local struct pool lens_pool;
1295e25fdb51Smrg
1296e25fdb51Smrg /* -- parallel compression -- */
1297e25fdb51Smrg
1298e25fdb51Smrg /* compress or write job (passed from compress list to write list) -- if seq is
1299e25fdb51Smrg equal to -1, compress_thread is instructed to return; if more is false then
1300e25fdb51Smrg this is the last chunk, which after writing tells write_thread to return */
1301e25fdb51Smrg struct job {
1302e25fdb51Smrg long seq; /* sequence number */
1303e25fdb51Smrg int more; /* true if this is not the last chunk */
1304e25fdb51Smrg struct space *in; /* input data to compress */
1305e25fdb51Smrg struct space *out; /* dictionary or resulting compressed data */
1306cf2fd8adStls struct space *lens; /* coded list of flush block lengths */
1307e25fdb51Smrg unsigned long check; /* check value for input data */
1308e25fdb51Smrg lock *calc; /* released when check calculation complete */
1309e25fdb51Smrg struct job *next; /* next job in the list (either list) */
1310e25fdb51Smrg };
1311e25fdb51Smrg
1312e25fdb51Smrg /* list of compress jobs (with tail for appending to list) */
1313e25fdb51Smrg local lock *compress_have = NULL; /* number of compress jobs waiting */
1314e25fdb51Smrg local struct job *compress_head, **compress_tail;
1315e25fdb51Smrg
1316e25fdb51Smrg /* list of write jobs */
1317e25fdb51Smrg local lock *write_first; /* lowest sequence number in list */
1318e25fdb51Smrg local struct job *write_head;
1319e25fdb51Smrg
1320e25fdb51Smrg /* number of compression threads running */
1321e25fdb51Smrg local int cthreads = 0;
1322e25fdb51Smrg
1323e25fdb51Smrg /* write thread if running */
1324e25fdb51Smrg local thread *writeth = NULL;
1325e25fdb51Smrg
1326e25fdb51Smrg /* setup job lists (call from main thread) */
setup_jobs(void)1327e25fdb51Smrg local void setup_jobs(void)
1328e25fdb51Smrg {
1329e25fdb51Smrg /* set up only if not already set up*/
1330e25fdb51Smrg if (compress_have != NULL)
1331e25fdb51Smrg return;
1332e25fdb51Smrg
1333e25fdb51Smrg /* allocate locks and initialize lists */
1334e25fdb51Smrg compress_have = new_lock(0);
1335e25fdb51Smrg compress_head = NULL;
1336e25fdb51Smrg compress_tail = &compress_head;
1337e25fdb51Smrg write_first = new_lock(-1);
1338e25fdb51Smrg write_head = NULL;
1339e25fdb51Smrg
1340cf2fd8adStls /* initialize buffer pools (initial size for out_pool not critical, since
1341cf2fd8adStls buffers will be grown in size if needed -- initial size chosen to make
1342cf2fd8adStls this unlikely -- same for lens_pool) */
1343cf2fd8adStls new_pool(&in_pool, g.block, INBUFS(g.procs));
1344cf2fd8adStls new_pool(&out_pool, OUTPOOL(g.block), -1);
1345cf2fd8adStls new_pool(&dict_pool, DICT, -1);
1346cf2fd8adStls new_pool(&lens_pool, g.block >> (RSYNCBITS - 1), -1);
1347e25fdb51Smrg }
1348e25fdb51Smrg
1349e25fdb51Smrg /* command the compress threads to all return, then join them all (call from
1350e25fdb51Smrg main thread), free all the thread-related resources */
finish_jobs(void)1351e25fdb51Smrg local void finish_jobs(void)
1352e25fdb51Smrg {
1353e25fdb51Smrg struct job job;
1354e25fdb51Smrg int caught;
1355e25fdb51Smrg
1356e25fdb51Smrg /* only do this once */
1357e25fdb51Smrg if (compress_have == NULL)
1358e25fdb51Smrg return;
1359e25fdb51Smrg
1360e25fdb51Smrg /* command all of the extant compress threads to return */
1361e25fdb51Smrg possess(compress_have);
1362e25fdb51Smrg job.seq = -1;
1363e25fdb51Smrg job.next = NULL;
1364e25fdb51Smrg compress_head = &job;
1365e25fdb51Smrg compress_tail = &(job.next);
1366e25fdb51Smrg twist(compress_have, BY, +1); /* will wake them all up */
1367e25fdb51Smrg
1368e25fdb51Smrg /* join all of the compress threads, verify they all came back */
1369e25fdb51Smrg caught = join_all();
1370e25fdb51Smrg Trace(("-- joined %d compress threads", caught));
1371e25fdb51Smrg assert(caught == cthreads);
1372e25fdb51Smrg cthreads = 0;
1373e25fdb51Smrg
1374e25fdb51Smrg /* free the resources */
1375cf2fd8adStls caught = free_pool(&lens_pool);
1376cf2fd8adStls Trace(("-- freed %d block lengths buffers", caught));
1377cf2fd8adStls caught = free_pool(&dict_pool);
1378cf2fd8adStls Trace(("-- freed %d dictionary buffers", caught));
1379e25fdb51Smrg caught = free_pool(&out_pool);
1380e25fdb51Smrg Trace(("-- freed %d output buffers", caught));
1381e25fdb51Smrg caught = free_pool(&in_pool);
1382e25fdb51Smrg Trace(("-- freed %d input buffers", caught));
1383e25fdb51Smrg free_lock(write_first);
1384e25fdb51Smrg free_lock(compress_have);
1385e25fdb51Smrg compress_have = NULL;
1386e25fdb51Smrg }
1387e25fdb51Smrg
1388cf2fd8adStls /* compress all strm->avail_in bytes at strm->next_in to out->buf, updating
1389cf2fd8adStls out->len, grow the size of the buffer (out->size) if necessary -- respect
1390cf2fd8adStls the size limitations of the zlib stream data types (size_t may be larger
1391cf2fd8adStls than unsigned) */
deflate_engine(z_stream * strm,struct space * out,int flush)1392cf2fd8adStls local void deflate_engine(z_stream *strm, struct space *out, int flush)
1393cf2fd8adStls {
1394cf2fd8adStls size_t room;
1395cf2fd8adStls
1396cf2fd8adStls do {
1397cf2fd8adStls room = out->size - out->len;
1398cf2fd8adStls if (room == 0) {
1399cf2fd8adStls grow_space(out);
1400cf2fd8adStls room = out->size - out->len;
1401cf2fd8adStls }
1402cf2fd8adStls strm->next_out = out->buf + out->len;
1403cf2fd8adStls strm->avail_out = room < UINT_MAX ? (unsigned)room : UINT_MAX;
1404cf2fd8adStls (void)deflate(strm, flush);
1405cf2fd8adStls out->len = strm->next_out - out->buf;
1406cf2fd8adStls } while (strm->avail_out == 0);
1407cf2fd8adStls assert(strm->avail_in == 0);
1408cf2fd8adStls }
1409cf2fd8adStls
1410e25fdb51Smrg /* get the next compression job from the head of the list, compress and compute
1411e25fdb51Smrg the check value on the input, and put a job in the write list with the
1412e25fdb51Smrg results -- keep looking for more jobs, returning when a job is found with a
1413e25fdb51Smrg sequence number of -1 (leave that job in the list for other incarnations to
1414e25fdb51Smrg find) */
compress_thread(void * dummy)1415e25fdb51Smrg local void compress_thread(void *dummy)
1416e25fdb51Smrg {
1417e25fdb51Smrg struct job *job; /* job pulled and working on */
1418e25fdb51Smrg struct job *here, **prior; /* pointers for inserting in write list */
1419e25fdb51Smrg unsigned long check; /* check value of input */
1420cf2fd8adStls unsigned char *next; /* pointer for blocks, check value data */
1421cf2fd8adStls size_t left; /* input left to process */
1422e25fdb51Smrg size_t len; /* remaining bytes to compress/check */
1423cf2fd8adStls #if ZLIB_VERNUM >= 0x1260
1424cf2fd8adStls int bits; /* deflate pending bits */
1425cf2fd8adStls #endif
1426cf2fd8adStls struct space *temp; /* temporary space for zopfli input */
1427e25fdb51Smrg z_stream strm; /* deflate stream */
1428e25fdb51Smrg
1429e25fdb51Smrg (void)dummy;
1430e25fdb51Smrg
1431e25fdb51Smrg /* initialize the deflate stream for this thread */
1432cf2fd8adStls strm.zfree = ZFREE;
1433cf2fd8adStls strm.zalloc = ZALLOC;
1434cf2fd8adStls strm.opaque = OPAQUE;
1435cf2fd8adStls if (deflateInit2(&strm, 6, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK)
1436e25fdb51Smrg bail("not enough memory", "");
1437e25fdb51Smrg
1438e25fdb51Smrg /* keep looking for work */
1439e25fdb51Smrg for (;;) {
1440e25fdb51Smrg /* get a job (like I tell my son) */
1441e25fdb51Smrg possess(compress_have);
1442e25fdb51Smrg wait_for(compress_have, NOT_TO_BE, 0);
1443e25fdb51Smrg job = compress_head;
1444e25fdb51Smrg assert(job != NULL);
1445e25fdb51Smrg if (job->seq == -1)
1446e25fdb51Smrg break;
1447e25fdb51Smrg compress_head = job->next;
1448e25fdb51Smrg if (job->next == NULL)
1449e25fdb51Smrg compress_tail = &compress_head;
1450e25fdb51Smrg twist(compress_have, BY, -1);
1451e25fdb51Smrg
1452e25fdb51Smrg /* got a job -- initialize and set the compression level (note that if
1453e25fdb51Smrg deflateParams() is called immediately after deflateReset(), there is
1454e25fdb51Smrg no need to initialize the input/output for the stream) */
1455e25fdb51Smrg Trace(("-- compressing #%ld", job->seq));
1456cf2fd8adStls if (g.level <= 9) {
1457e25fdb51Smrg (void)deflateReset(&strm);
1458cf2fd8adStls (void)deflateParams(&strm, g.level, Z_DEFAULT_STRATEGY);
1459cf2fd8adStls }
1460cf2fd8adStls else {
1461cf2fd8adStls temp = get_space(&out_pool);
1462cf2fd8adStls temp->len = 0;
1463cf2fd8adStls }
1464e25fdb51Smrg
1465cf2fd8adStls /* set dictionary if provided, release that input or dictionary buffer
1466cf2fd8adStls (not NULL if g.setdict is true and if this is not the first work
1467cf2fd8adStls unit) */
1468e25fdb51Smrg if (job->out != NULL) {
1469e25fdb51Smrg len = job->out->len;
1470cf2fd8adStls left = len < DICT ? len : DICT;
1471cf2fd8adStls if (g.level <= 9)
1472cf2fd8adStls deflateSetDictionary(&strm, job->out->buf + (len - left),
1473cf2fd8adStls left);
1474cf2fd8adStls else {
1475cf2fd8adStls memcpy(temp->buf, job->out->buf + (len - left), left);
1476cf2fd8adStls temp->len = left;
1477cf2fd8adStls }
1478e25fdb51Smrg drop_space(job->out);
1479e25fdb51Smrg }
1480e25fdb51Smrg
1481cf2fd8adStls /* set up input and output */
1482e25fdb51Smrg job->out = get_space(&out_pool);
1483cf2fd8adStls if (g.level <= 9) {
1484e25fdb51Smrg strm.next_in = job->in->buf;
1485e25fdb51Smrg strm.next_out = job->out->buf;
1486cf2fd8adStls }
1487cf2fd8adStls else
1488cf2fd8adStls memcpy(temp->buf + temp->len, job->in->buf, job->in->len);
1489e25fdb51Smrg
1490cf2fd8adStls /* compress each block, either flushing or finishing */
1491cf2fd8adStls next = job->lens == NULL ? NULL : job->lens->buf;
1492cf2fd8adStls left = job->in->len;
1493cf2fd8adStls job->out->len = 0;
1494cf2fd8adStls do {
1495cf2fd8adStls /* decode next block length from blocks list */
1496cf2fd8adStls len = next == NULL ? 128 : *next++;
1497cf2fd8adStls if (len < 128) /* 64..32831 */
1498cf2fd8adStls len = (len << 8) + (*next++) + 64;
1499cf2fd8adStls else if (len == 128) /* end of list */
1500cf2fd8adStls len = left;
1501cf2fd8adStls else if (len < 192) /* 1..63 */
1502cf2fd8adStls len &= 0x3f;
1503cf2fd8adStls else { /* 32832..4227135 */
1504cf2fd8adStls len = ((len & 0x3f) << 16) + (*next++ << 8) + 32832U;
1505cf2fd8adStls len += *next++;
1506cf2fd8adStls }
1507cf2fd8adStls left -= len;
1508cf2fd8adStls
1509cf2fd8adStls if (g.level <= 9) {
1510cf2fd8adStls /* run MAXP2-sized amounts of input through deflate -- this
1511cf2fd8adStls loop is needed for those cases where the unsigned type is
1512cf2fd8adStls smaller than the size_t type, or when len is close to the
1513cf2fd8adStls limit of the size_t type */
1514cf2fd8adStls while (len > MAXP2) {
1515cf2fd8adStls strm.avail_in = MAXP2;
1516cf2fd8adStls deflate_engine(&strm, job->out, Z_NO_FLUSH);
1517cf2fd8adStls len -= MAXP2;
1518e25fdb51Smrg }
1519e25fdb51Smrg
1520cf2fd8adStls /* run the last piece through deflate -- end on a byte
1521cf2fd8adStls boundary, using a sync marker if necessary, or finish the
1522cf2fd8adStls deflate stream if this is the last block */
1523e25fdb51Smrg strm.avail_in = (unsigned)len;
1524cf2fd8adStls if (left || job->more) {
1525cf2fd8adStls #if ZLIB_VERNUM >= 0x1260
1526cf2fd8adStls deflate_engine(&strm, job->out, Z_BLOCK);
1527cf2fd8adStls
1528cf2fd8adStls /* add enough empty blocks to get to a byte boundary */
1529cf2fd8adStls (void)deflatePending(&strm, Z_NULL, &bits);
1530cf2fd8adStls if (bits & 1)
1531cf2fd8adStls deflate_engine(&strm, job->out, Z_SYNC_FLUSH);
1532cf2fd8adStls else if (bits & 7) {
1533cf2fd8adStls do { /* add static empty blocks */
1534cf2fd8adStls bits = deflatePrime(&strm, 10, 2);
1535cf2fd8adStls assert(bits == Z_OK);
1536cf2fd8adStls (void)deflatePending(&strm, Z_NULL, &bits);
1537cf2fd8adStls } while (bits & 7);
1538cf2fd8adStls deflate_engine(&strm, job->out, Z_BLOCK);
1539cf2fd8adStls }
1540cf2fd8adStls #else
1541cf2fd8adStls deflate_engine(&strm, job->out, Z_SYNC_FLUSH);
1542cf2fd8adStls #endif
1543cf2fd8adStls }
1544cf2fd8adStls else
1545cf2fd8adStls deflate_engine(&strm, job->out, Z_FINISH);
1546cf2fd8adStls }
1547cf2fd8adStls else {
1548cf2fd8adStls /* compress len bytes using zopfli, bring to byte boundary */
1549cf2fd8adStls unsigned char bits, *out;
1550cf2fd8adStls size_t outsize;
1551cf2fd8adStls
1552cf2fd8adStls out = NULL;
1553cf2fd8adStls outsize = 0;
1554cf2fd8adStls bits = 0;
1555cf2fd8adStls ZopfliDeflatePart(&g.zopts, 2, !(left || job->more),
1556cf2fd8adStls temp->buf, temp->len, temp->len + len,
1557cf2fd8adStls &bits, &out, &outsize);
1558cf2fd8adStls assert(job->out->len + outsize + 5 <= job->out->size);
1559cf2fd8adStls memcpy(job->out->buf + job->out->len, out, outsize);
1560cf2fd8adStls free(out);
1561cf2fd8adStls job->out->len += outsize;
1562cf2fd8adStls if (left || job->more) {
1563cf2fd8adStls bits &= 7;
1564cf2fd8adStls if (bits & 1) {
1565cf2fd8adStls if (bits == 7)
1566cf2fd8adStls job->out->buf[job->out->len++] = 0;
1567cf2fd8adStls job->out->buf[job->out->len++] = 0;
1568cf2fd8adStls job->out->buf[job->out->len++] = 0;
1569cf2fd8adStls job->out->buf[job->out->len++] = 0xff;
1570cf2fd8adStls job->out->buf[job->out->len++] = 0xff;
1571cf2fd8adStls }
1572cf2fd8adStls else if (bits) {
1573cf2fd8adStls do {
1574cf2fd8adStls job->out->buf[job->out->len - 1] += 2 << bits;
1575cf2fd8adStls job->out->buf[job->out->len++] = 0;
1576cf2fd8adStls bits += 2;
1577cf2fd8adStls } while (bits < 8);
1578cf2fd8adStls }
1579cf2fd8adStls }
1580cf2fd8adStls temp->len += len;
1581cf2fd8adStls }
1582cf2fd8adStls } while (left);
1583cf2fd8adStls if (g.level > 9)
1584cf2fd8adStls drop_space(temp);
1585cf2fd8adStls if (job->lens != NULL) {
1586cf2fd8adStls drop_space(job->lens);
1587cf2fd8adStls job->lens = NULL;
1588cf2fd8adStls }
1589e25fdb51Smrg Trace(("-- compressed #%ld%s", job->seq, job->more ? "" : " (last)"));
1590e25fdb51Smrg
1591e25fdb51Smrg /* reserve input buffer until check value has been calculated */
1592e25fdb51Smrg use_space(job->in);
1593e25fdb51Smrg
1594e25fdb51Smrg /* insert write job in list in sorted order, alert write thread */
1595e25fdb51Smrg possess(write_first);
1596e25fdb51Smrg prior = &write_head;
1597e25fdb51Smrg while ((here = *prior) != NULL) {
1598e25fdb51Smrg if (here->seq > job->seq)
1599e25fdb51Smrg break;
1600e25fdb51Smrg prior = &(here->next);
1601e25fdb51Smrg }
1602e25fdb51Smrg job->next = here;
1603e25fdb51Smrg *prior = job;
1604e25fdb51Smrg twist(write_first, TO, write_head->seq);
1605e25fdb51Smrg
1606e25fdb51Smrg /* calculate the check value in parallel with writing, alert the write
1607e25fdb51Smrg thread that the calculation is complete, and drop this usage of the
1608e25fdb51Smrg input buffer */
1609e25fdb51Smrg len = job->in->len;
1610e25fdb51Smrg next = job->in->buf;
1611e25fdb51Smrg check = CHECK(0L, Z_NULL, 0);
1612cf2fd8adStls while (len > MAXP2) {
1613cf2fd8adStls check = CHECK(check, next, MAXP2);
1614cf2fd8adStls len -= MAXP2;
1615cf2fd8adStls next += MAXP2;
1616e25fdb51Smrg }
1617e25fdb51Smrg check = CHECK(check, next, (unsigned)len);
1618e25fdb51Smrg drop_space(job->in);
1619e25fdb51Smrg job->check = check;
1620cf2fd8adStls Trace(("-- checked #%ld%s", job->seq, job->more ? "" : " (last)"));
1621e25fdb51Smrg possess(job->calc);
1622e25fdb51Smrg twist(job->calc, TO, 1);
1623e25fdb51Smrg
1624e25fdb51Smrg /* done with that one -- go find another job */
1625e25fdb51Smrg }
1626e25fdb51Smrg
1627e25fdb51Smrg /* found job with seq == -1 -- free deflate memory and return to join */
1628e25fdb51Smrg release(compress_have);
1629cf2fd8adStls (void)deflateEnd(&strm);
1630e25fdb51Smrg }
1631e25fdb51Smrg
1632e25fdb51Smrg /* collect the write jobs off of the list in sequence order and write out the
1633e25fdb51Smrg compressed data until the last chunk is written -- also write the header and
1634e25fdb51Smrg trailer and combine the individual check values of the input buffers */
write_thread(void * dummy)1635e25fdb51Smrg local void write_thread(void *dummy)
1636e25fdb51Smrg {
1637e25fdb51Smrg long seq; /* next sequence number looking for */
1638e25fdb51Smrg struct job *job; /* job pulled and working on */
1639e25fdb51Smrg size_t len; /* input length */
1640e25fdb51Smrg int more; /* true if more chunks to write */
1641e25fdb51Smrg unsigned long head; /* header length */
1642e25fdb51Smrg unsigned long ulen; /* total uncompressed size (overflow ok) */
1643e25fdb51Smrg unsigned long clen; /* total compressed size (overflow ok) */
1644e25fdb51Smrg unsigned long check; /* check value of uncompressed data */
1645e25fdb51Smrg
1646e25fdb51Smrg (void)dummy;
1647e25fdb51Smrg
1648e25fdb51Smrg /* build and write header */
1649e25fdb51Smrg Trace(("-- write thread running"));
1650e25fdb51Smrg head = put_header();
1651e25fdb51Smrg
1652e25fdb51Smrg /* process output of compress threads until end of input */
1653e25fdb51Smrg ulen = clen = 0;
1654e25fdb51Smrg check = CHECK(0L, Z_NULL, 0);
1655e25fdb51Smrg seq = 0;
1656e25fdb51Smrg do {
1657e25fdb51Smrg /* get next write job in order */
1658e25fdb51Smrg possess(write_first);
1659e25fdb51Smrg wait_for(write_first, TO_BE, seq);
1660e25fdb51Smrg job = write_head;
1661e25fdb51Smrg write_head = job->next;
1662e25fdb51Smrg twist(write_first, TO, write_head == NULL ? -1 : write_head->seq);
1663e25fdb51Smrg
1664e25fdb51Smrg /* update lengths, save uncompressed length for COMB */
1665e25fdb51Smrg more = job->more;
1666e25fdb51Smrg len = job->in->len;
1667e25fdb51Smrg drop_space(job->in);
1668e25fdb51Smrg ulen += (unsigned long)len;
1669e25fdb51Smrg clen += (unsigned long)(job->out->len);
1670e25fdb51Smrg
1671e25fdb51Smrg /* write the compressed data and drop the output buffer */
1672e25fdb51Smrg Trace(("-- writing #%ld", seq));
1673cf2fd8adStls writen(g.outd, job->out->buf, job->out->len);
1674e25fdb51Smrg drop_space(job->out);
1675e25fdb51Smrg Trace(("-- wrote #%ld%s", seq, more ? "" : " (last)"));
1676e25fdb51Smrg
1677e25fdb51Smrg /* wait for check calculation to complete, then combine, once
1678e25fdb51Smrg the compress thread is done with the input, release it */
1679e25fdb51Smrg possess(job->calc);
1680e25fdb51Smrg wait_for(job->calc, TO_BE, 1);
1681e25fdb51Smrg release(job->calc);
1682e25fdb51Smrg check = COMB(check, job->check, len);
1683e25fdb51Smrg
1684e25fdb51Smrg /* free the job */
1685e25fdb51Smrg free_lock(job->calc);
1686cf2fd8adStls FREE(job);
1687e25fdb51Smrg
1688e25fdb51Smrg /* get the next buffer in sequence */
1689e25fdb51Smrg seq++;
1690e25fdb51Smrg } while (more);
1691e25fdb51Smrg
1692e25fdb51Smrg /* write trailer */
1693e25fdb51Smrg put_trailer(ulen, clen, check, head);
1694e25fdb51Smrg
1695e25fdb51Smrg /* verify no more jobs, prepare for next use */
1696e25fdb51Smrg possess(compress_have);
1697e25fdb51Smrg assert(compress_head == NULL && peek_lock(compress_have) == 0);
1698e25fdb51Smrg release(compress_have);
1699e25fdb51Smrg possess(write_first);
1700e25fdb51Smrg assert(write_head == NULL);
1701e25fdb51Smrg twist(write_first, TO, -1);
1702e25fdb51Smrg }
1703e25fdb51Smrg
1704cf2fd8adStls /* encode a hash hit to the block lengths list -- hit == 0 ends the list */
append_len(struct job * job,size_t len)1705cf2fd8adStls local void append_len(struct job *job, size_t len)
1706cf2fd8adStls {
1707cf2fd8adStls struct space *lens;
1708cf2fd8adStls
1709cf2fd8adStls assert(len < 4227136UL);
1710cf2fd8adStls if (job->lens == NULL)
1711cf2fd8adStls job->lens = get_space(&lens_pool);
1712cf2fd8adStls lens = job->lens;
1713cf2fd8adStls if (lens->size < lens->len + 3)
1714cf2fd8adStls grow_space(lens);
1715cf2fd8adStls if (len < 64)
1716cf2fd8adStls lens->buf[lens->len++] = len + 128;
1717cf2fd8adStls else if (len < 32832U) {
1718cf2fd8adStls len -= 64;
1719cf2fd8adStls lens->buf[lens->len++] = len >> 8;
1720cf2fd8adStls lens->buf[lens->len++] = len;
1721cf2fd8adStls }
1722cf2fd8adStls else {
1723cf2fd8adStls len -= 32832U;
1724cf2fd8adStls lens->buf[lens->len++] = (len >> 16) + 192;
1725cf2fd8adStls lens->buf[lens->len++] = len >> 8;
1726cf2fd8adStls lens->buf[lens->len++] = len;
1727cf2fd8adStls }
1728cf2fd8adStls }
1729cf2fd8adStls
1730e25fdb51Smrg /* compress ind to outd, using multiple threads for the compression and check
1731e25fdb51Smrg value calculations and one other thread for writing the output -- compress
1732e25fdb51Smrg threads will be launched and left running (waiting actually) to support
1733e25fdb51Smrg subsequent calls of parallel_compress() */
parallel_compress(void)1734e25fdb51Smrg local void parallel_compress(void)
1735e25fdb51Smrg {
1736e25fdb51Smrg long seq; /* sequence number */
1737cf2fd8adStls struct space *curr; /* input data to compress */
1738cf2fd8adStls struct space *next; /* input data that follows curr */
1739cf2fd8adStls struct space *hold; /* input data that follows next */
1740cf2fd8adStls struct space *dict; /* dictionary for next compression */
1741e25fdb51Smrg struct job *job; /* job for compress, then write */
1742e25fdb51Smrg int more; /* true if more input to read */
1743cf2fd8adStls unsigned hash; /* hash for rsyncable */
1744cf2fd8adStls unsigned char *scan; /* next byte to compute hash on */
1745cf2fd8adStls unsigned char *end; /* after end of data to compute hash on */
1746cf2fd8adStls unsigned char *last; /* position after last hit */
1747cf2fd8adStls size_t left; /* last hit in curr to end of curr */
1748cf2fd8adStls size_t len; /* for various length computations */
1749e25fdb51Smrg
1750e25fdb51Smrg /* if first time or after an option change, setup the job lists */
1751e25fdb51Smrg setup_jobs();
1752e25fdb51Smrg
1753e25fdb51Smrg /* start write thread */
1754e25fdb51Smrg writeth = launch(write_thread, NULL);
1755e25fdb51Smrg
1756e25fdb51Smrg /* read from input and start compress threads (write thread will pick up
1757e25fdb51Smrg the output of the compress threads) */
1758e25fdb51Smrg seq = 0;
1759e25fdb51Smrg next = get_space(&in_pool);
1760cf2fd8adStls next->len = readn(g.ind, next->buf, next->size);
1761cf2fd8adStls hold = NULL;
1762cf2fd8adStls dict = NULL;
1763cf2fd8adStls scan = next->buf;
1764cf2fd8adStls hash = RSYNCHIT;
1765cf2fd8adStls left = 0;
1766e25fdb51Smrg do {
1767cf2fd8adStls /* create a new job */
1768cf2fd8adStls job = MALLOC(sizeof(struct job));
1769e25fdb51Smrg if (job == NULL)
1770e25fdb51Smrg bail("not enough memory", "");
1771e25fdb51Smrg job->calc = new_lock(0);
1772e25fdb51Smrg
1773cf2fd8adStls /* update input spaces */
1774cf2fd8adStls curr = next;
1775cf2fd8adStls next = hold;
1776cf2fd8adStls hold = NULL;
1777cf2fd8adStls
1778cf2fd8adStls /* get more input if we don't already have some */
1779cf2fd8adStls if (next == NULL) {
1780e25fdb51Smrg next = get_space(&in_pool);
1781cf2fd8adStls next->len = readn(g.ind, next->buf, next->size);
1782cf2fd8adStls }
1783cf2fd8adStls
1784cf2fd8adStls /* if rsyncable, generate block lengths and prepare curr for job to
1785cf2fd8adStls likely have less than size bytes (up to the last hash hit) */
1786cf2fd8adStls job->lens = NULL;
1787cf2fd8adStls if (g.rsync && curr->len) {
1788cf2fd8adStls /* compute the hash function starting where we last left off to
1789cf2fd8adStls cover either size bytes or to EOF, whichever is less, through
1790cf2fd8adStls the data in curr (and in the next loop, through next) -- save
1791cf2fd8adStls the block lengths resulting from the hash hits in the job->lens
1792cf2fd8adStls list */
1793cf2fd8adStls if (left == 0) {
1794cf2fd8adStls /* scan is in curr */
1795cf2fd8adStls last = curr->buf;
1796cf2fd8adStls end = curr->buf + curr->len;
1797cf2fd8adStls while (scan < end) {
1798cf2fd8adStls hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
1799cf2fd8adStls if (hash == RSYNCHIT) {
1800cf2fd8adStls len = scan - last;
1801cf2fd8adStls append_len(job, len);
1802cf2fd8adStls last = scan;
1803cf2fd8adStls }
1804cf2fd8adStls }
1805cf2fd8adStls
1806cf2fd8adStls /* continue scan in next */
1807cf2fd8adStls left = scan - last;
1808cf2fd8adStls scan = next->buf;
1809cf2fd8adStls }
1810cf2fd8adStls
1811cf2fd8adStls /* scan in next for enough bytes to fill curr, or what is available
1812cf2fd8adStls in next, whichever is less (if next isn't full, then we're at
1813cf2fd8adStls the end of the file) -- the bytes in curr since the last hit,
1814cf2fd8adStls stored in left, counts towards the size of the first block */
1815cf2fd8adStls last = next->buf;
1816cf2fd8adStls len = curr->size - curr->len;
1817cf2fd8adStls if (len > next->len)
1818cf2fd8adStls len = next->len;
1819cf2fd8adStls end = next->buf + len;
1820cf2fd8adStls while (scan < end) {
1821cf2fd8adStls hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
1822cf2fd8adStls if (hash == RSYNCHIT) {
1823cf2fd8adStls len = (scan - last) + left;
1824cf2fd8adStls left = 0;
1825cf2fd8adStls append_len(job, len);
1826cf2fd8adStls last = scan;
1827cf2fd8adStls }
1828cf2fd8adStls }
1829cf2fd8adStls append_len(job, 0);
1830cf2fd8adStls
1831cf2fd8adStls /* create input in curr for job up to last hit or entire buffer if
1832cf2fd8adStls no hits at all -- save remainder in next and possibly hold */
1833cf2fd8adStls len = (job->lens->len == 1 ? scan : last) - next->buf;
1834cf2fd8adStls if (len) {
1835cf2fd8adStls /* got hits in next, or no hits in either -- copy to curr */
1836cf2fd8adStls memcpy(curr->buf + curr->len, next->buf, len);
1837cf2fd8adStls curr->len += len;
1838cf2fd8adStls memmove(next->buf, next->buf + len, next->len - len);
1839cf2fd8adStls next->len -= len;
1840cf2fd8adStls scan -= len;
1841cf2fd8adStls left = 0;
1842cf2fd8adStls }
1843cf2fd8adStls else if (job->lens->len != 1 && left && next->len) {
1844cf2fd8adStls /* had hits in curr, but none in next, and last hit in curr
1845cf2fd8adStls wasn't right at the end, so we have input there to save --
1846cf2fd8adStls use curr up to the last hit, save the rest, moving next to
1847cf2fd8adStls hold */
1848cf2fd8adStls hold = next;
1849cf2fd8adStls next = get_space(&in_pool);
1850cf2fd8adStls memcpy(next->buf, curr->buf + (curr->len - left), left);
1851cf2fd8adStls next->len = left;
1852cf2fd8adStls curr->len -= left;
1853cf2fd8adStls }
1854cf2fd8adStls else {
1855cf2fd8adStls /* else, last match happened to be right at the end of curr,
1856cf2fd8adStls or we're at the end of the input compressing the rest */
1857cf2fd8adStls left = 0;
1858cf2fd8adStls }
1859cf2fd8adStls }
1860cf2fd8adStls
1861cf2fd8adStls /* compress curr->buf to curr->len -- compress thread will drop curr */
1862cf2fd8adStls job->in = curr;
1863cf2fd8adStls
1864cf2fd8adStls /* set job->more if there is more to compress after curr */
1865e25fdb51Smrg more = next->len != 0;
1866e25fdb51Smrg job->more = more;
1867cf2fd8adStls
1868cf2fd8adStls /* provide dictionary for this job, prepare dictionary for next job */
1869cf2fd8adStls job->out = dict;
1870cf2fd8adStls if (more && g.setdict) {
1871cf2fd8adStls if (curr->len >= DICT || job->out == NULL) {
1872cf2fd8adStls dict = curr;
1873cf2fd8adStls use_space(dict);
1874cf2fd8adStls }
1875cf2fd8adStls else {
1876cf2fd8adStls dict = get_space(&dict_pool);
1877cf2fd8adStls len = DICT - curr->len;
1878cf2fd8adStls memcpy(dict->buf, job->out->buf + (job->out->len - len), len);
1879cf2fd8adStls memcpy(dict->buf + len, curr->buf, curr->len);
1880cf2fd8adStls dict->len = DICT;
1881cf2fd8adStls }
1882cf2fd8adStls }
1883cf2fd8adStls
1884cf2fd8adStls /* preparation of job is complete */
1885cf2fd8adStls job->seq = seq;
1886e25fdb51Smrg Trace(("-- read #%ld%s", seq, more ? "" : " (last)"));
1887e25fdb51Smrg if (++seq < 1)
1888cf2fd8adStls bail("input too long: ", g.inf);
1889e25fdb51Smrg
1890e25fdb51Smrg /* start another compress thread if needed */
1891cf2fd8adStls if (cthreads < seq && cthreads < g.procs) {
1892e25fdb51Smrg (void)launch(compress_thread, NULL);
1893e25fdb51Smrg cthreads++;
1894e25fdb51Smrg }
1895e25fdb51Smrg
1896e25fdb51Smrg /* put job at end of compress list, let all the compressors know */
1897e25fdb51Smrg possess(compress_have);
1898e25fdb51Smrg job->next = NULL;
1899e25fdb51Smrg *compress_tail = job;
1900e25fdb51Smrg compress_tail = &(job->next);
1901e25fdb51Smrg twist(compress_have, BY, +1);
1902e25fdb51Smrg } while (more);
1903cf2fd8adStls drop_space(next);
1904e25fdb51Smrg
1905e25fdb51Smrg /* wait for the write thread to complete (we leave the compress threads out
1906e25fdb51Smrg there and waiting in case there is another stream to compress) */
1907e25fdb51Smrg join(writeth);
1908e25fdb51Smrg writeth = NULL;
1909e25fdb51Smrg Trace(("-- write thread joined"));
1910e25fdb51Smrg }
1911e25fdb51Smrg
1912e25fdb51Smrg #endif
1913e25fdb51Smrg
1914cf2fd8adStls /* repeated code in single_compress to compress available input and write it */
1915cf2fd8adStls #define DEFLATE_WRITE(flush) \
1916cf2fd8adStls do { \
1917cf2fd8adStls do { \
1918cf2fd8adStls strm->avail_out = out_size; \
1919cf2fd8adStls strm->next_out = out; \
1920cf2fd8adStls (void)deflate(strm, flush); \
1921cf2fd8adStls writen(g.outd, out, out_size - strm->avail_out); \
1922cf2fd8adStls clen += out_size - strm->avail_out; \
1923cf2fd8adStls } while (strm->avail_out == 0); \
1924cf2fd8adStls assert(strm->avail_in == 0); \
1925cf2fd8adStls } while (0)
1926cf2fd8adStls
1927e25fdb51Smrg /* do a simple compression in a single thread from ind to outd -- if reset is
1928e25fdb51Smrg true, instead free the memory that was allocated and retained for input,
1929e25fdb51Smrg output, and deflate */
single_compress(int reset)1930e25fdb51Smrg local void single_compress(int reset)
1931e25fdb51Smrg {
1932cf2fd8adStls size_t got; /* amount of data in in[] */
1933cf2fd8adStls size_t more; /* amount of data in next[] (0 if eof) */
1934cf2fd8adStls size_t start; /* start of data in next[] */
1935cf2fd8adStls size_t have; /* bytes in current block for -i */
1936cf2fd8adStls size_t hist; /* offset of permitted history */
1937cf2fd8adStls int fresh; /* if true, reset compression history */
1938cf2fd8adStls unsigned hash; /* hash for rsyncable */
1939cf2fd8adStls unsigned char *scan; /* pointer for hash computation */
1940cf2fd8adStls size_t left; /* bytes left to compress after hash hit */
1941e25fdb51Smrg unsigned long head; /* header length */
1942e25fdb51Smrg unsigned long ulen; /* total uncompressed size (overflow ok) */
1943e25fdb51Smrg unsigned long clen; /* total compressed size (overflow ok) */
1944e25fdb51Smrg unsigned long check; /* check value of uncompressed data */
1945e25fdb51Smrg static unsigned out_size; /* size of output buffer */
1946e25fdb51Smrg static unsigned char *in, *next, *out; /* reused i/o buffers */
1947e25fdb51Smrg static z_stream *strm = NULL; /* reused deflate structure */
1948e25fdb51Smrg
1949e25fdb51Smrg /* if requested, just release the allocations and return */
1950e25fdb51Smrg if (reset) {
1951e25fdb51Smrg if (strm != NULL) {
1952cf2fd8adStls (void)deflateEnd(strm);
1953cf2fd8adStls FREE(strm);
1954cf2fd8adStls FREE(out);
1955cf2fd8adStls FREE(next);
1956cf2fd8adStls FREE(in);
1957e25fdb51Smrg strm = NULL;
1958e25fdb51Smrg }
1959e25fdb51Smrg return;
1960e25fdb51Smrg }
1961e25fdb51Smrg
1962e25fdb51Smrg /* initialize the deflate structure if this is the first time */
1963e25fdb51Smrg if (strm == NULL) {
1964cf2fd8adStls out_size = g.block > MAXP2 ? MAXP2 : (unsigned)g.block;
1965cf2fd8adStls if ((in = MALLOC(g.block + DICT)) == NULL ||
1966cf2fd8adStls (next = MALLOC(g.block + DICT)) == NULL ||
1967cf2fd8adStls (out = MALLOC(out_size)) == NULL ||
1968cf2fd8adStls (strm = MALLOC(sizeof(z_stream))) == NULL)
1969e25fdb51Smrg bail("not enough memory", "");
1970cf2fd8adStls strm->zfree = ZFREE;
1971cf2fd8adStls strm->zalloc = ZALLOC;
1972cf2fd8adStls strm->opaque = OPAQUE;
1973cf2fd8adStls if (deflateInit2(strm, 6, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) !=
1974cf2fd8adStls Z_OK)
1975e25fdb51Smrg bail("not enough memory", "");
1976e25fdb51Smrg }
1977e25fdb51Smrg
1978e25fdb51Smrg /* write header */
1979e25fdb51Smrg head = put_header();
1980e25fdb51Smrg
1981e25fdb51Smrg /* set compression level in case it changed */
1982cf2fd8adStls if (g.level <= 9) {
1983e25fdb51Smrg (void)deflateReset(strm);
1984cf2fd8adStls (void)deflateParams(strm, g.level, Z_DEFAULT_STRATEGY);
1985e25fdb51Smrg }
1986e25fdb51Smrg
1987cf2fd8adStls /* do raw deflate and calculate check value */
1988cf2fd8adStls got = 0;
1989cf2fd8adStls more = readn(g.ind, next, g.block);
1990cf2fd8adStls ulen = (unsigned long)more;
1991cf2fd8adStls start = 0;
1992cf2fd8adStls hist = 0;
1993cf2fd8adStls clen = 0;
1994cf2fd8adStls have = 0;
1995cf2fd8adStls check = CHECK(0L, Z_NULL, 0);
1996cf2fd8adStls hash = RSYNCHIT;
1997e25fdb51Smrg do {
1998cf2fd8adStls /* get data to compress, see if there is any more input */
1999cf2fd8adStls if (got == 0) {
2000cf2fd8adStls scan = in; in = next; next = scan;
2001cf2fd8adStls strm->next_in = in + start;
2002cf2fd8adStls got = more;
2003cf2fd8adStls if (g.level > 9) {
2004cf2fd8adStls left = start + more - hist;
2005cf2fd8adStls if (left > DICT)
2006cf2fd8adStls left = DICT;
2007cf2fd8adStls memcpy(next, in + ((start + more) - left), left);
2008cf2fd8adStls start = left;
2009cf2fd8adStls hist = 0;
2010cf2fd8adStls }
2011cf2fd8adStls else
2012cf2fd8adStls start = 0;
2013cf2fd8adStls more = readn(g.ind, next + start, g.block);
2014cf2fd8adStls ulen += (unsigned long)more;
2015cf2fd8adStls }
2016cf2fd8adStls
2017cf2fd8adStls /* if rsyncable, compute hash until a hit or the end of the block */
2018cf2fd8adStls left = 0;
2019cf2fd8adStls if (g.rsync && got) {
2020cf2fd8adStls scan = strm->next_in;
2021cf2fd8adStls left = got;
2022cf2fd8adStls do {
2023cf2fd8adStls if (left == 0) {
2024cf2fd8adStls /* went to the end -- if no more or no hit in size bytes,
2025cf2fd8adStls then proceed to do a flush or finish with got bytes */
2026cf2fd8adStls if (more == 0 || got == g.block)
2027cf2fd8adStls break;
2028cf2fd8adStls
2029cf2fd8adStls /* fill in[] with what's left there and as much as possible
2030cf2fd8adStls from next[] -- set up to continue hash hit search */
2031cf2fd8adStls if (g.level > 9) {
2032cf2fd8adStls left = (strm->next_in - in) - hist;
2033cf2fd8adStls if (left > DICT)
2034cf2fd8adStls left = DICT;
2035cf2fd8adStls }
2036cf2fd8adStls memmove(in, strm->next_in - left, left + got);
2037cf2fd8adStls hist = 0;
2038cf2fd8adStls strm->next_in = in + left;
2039cf2fd8adStls scan = in + left + got;
2040cf2fd8adStls left = more > g.block - got ? g.block - got : more;
2041cf2fd8adStls memcpy(scan, next + start, left);
2042cf2fd8adStls got += left;
2043cf2fd8adStls more -= left;
2044cf2fd8adStls start += left;
2045cf2fd8adStls
2046cf2fd8adStls /* if that emptied the next buffer, try to refill it */
2047cf2fd8adStls if (more == 0) {
2048cf2fd8adStls more = readn(g.ind, next, g.block);
2049cf2fd8adStls ulen += (unsigned long)more;
2050cf2fd8adStls start = 0;
2051cf2fd8adStls }
2052cf2fd8adStls }
2053cf2fd8adStls left--;
2054cf2fd8adStls hash = ((hash << 1) ^ *scan++) & RSYNCMASK;
2055cf2fd8adStls } while (hash != RSYNCHIT);
2056cf2fd8adStls got -= left;
2057cf2fd8adStls }
2058cf2fd8adStls
2059cf2fd8adStls /* clear history for --independent option */
2060cf2fd8adStls fresh = 0;
2061cf2fd8adStls if (!g.setdict) {
2062cf2fd8adStls have += got;
2063cf2fd8adStls if (have > g.block) {
2064cf2fd8adStls fresh = 1;
2065cf2fd8adStls have = got;
2066cf2fd8adStls }
2067cf2fd8adStls }
2068cf2fd8adStls
2069cf2fd8adStls if (g.level <= 9) {
2070cf2fd8adStls /* clear history if requested */
2071cf2fd8adStls if (fresh)
2072cf2fd8adStls (void)deflateReset(strm);
2073cf2fd8adStls
2074cf2fd8adStls /* compress MAXP2-size chunks in case unsigned type is small */
2075cf2fd8adStls while (got > MAXP2) {
2076cf2fd8adStls strm->avail_in = MAXP2;
2077cf2fd8adStls check = CHECK(check, strm->next_in, strm->avail_in);
2078cf2fd8adStls DEFLATE_WRITE(Z_NO_FLUSH);
2079cf2fd8adStls got -= MAXP2;
2080cf2fd8adStls }
2081cf2fd8adStls
2082cf2fd8adStls /* compress the remainder, emit a block, finish if end of input */
2083cf2fd8adStls strm->avail_in = (unsigned)got;
2084cf2fd8adStls got = left;
2085cf2fd8adStls check = CHECK(check, strm->next_in, strm->avail_in);
2086cf2fd8adStls if (more || got) {
2087cf2fd8adStls #if ZLIB_VERNUM >= 0x1260
2088cf2fd8adStls int bits;
2089cf2fd8adStls
2090cf2fd8adStls DEFLATE_WRITE(Z_BLOCK);
2091cf2fd8adStls (void)deflatePending(strm, Z_NULL, &bits);
2092cf2fd8adStls if (bits & 1)
2093cf2fd8adStls DEFLATE_WRITE(Z_SYNC_FLUSH);
2094cf2fd8adStls else if (bits & 7) {
2095cf2fd8adStls do {
2096cf2fd8adStls bits = deflatePrime(strm, 10, 2);
2097cf2fd8adStls assert(bits == Z_OK);
2098cf2fd8adStls (void)deflatePending(strm, Z_NULL, &bits);
2099cf2fd8adStls } while (bits & 7);
2100cf2fd8adStls DEFLATE_WRITE(Z_NO_FLUSH);
2101cf2fd8adStls }
2102cf2fd8adStls #else
2103cf2fd8adStls DEFLATE_WRITE(Z_SYNC_FLUSH);
2104cf2fd8adStls #endif
2105cf2fd8adStls }
2106cf2fd8adStls else
2107cf2fd8adStls DEFLATE_WRITE(Z_FINISH);
2108cf2fd8adStls }
2109cf2fd8adStls else {
2110cf2fd8adStls /* compress got bytes using zopfli, bring to byte boundary */
2111cf2fd8adStls unsigned char bits, *out;
2112cf2fd8adStls size_t outsize, off;
2113cf2fd8adStls
2114cf2fd8adStls /* discard history if requested */
2115cf2fd8adStls off = strm->next_in - in;
2116cf2fd8adStls if (fresh)
2117cf2fd8adStls hist = off;
2118cf2fd8adStls
2119cf2fd8adStls out = NULL;
2120cf2fd8adStls outsize = 0;
2121cf2fd8adStls bits = 0;
2122cf2fd8adStls ZopfliDeflatePart(&g.zopts, 2, !(more || left),
2123cf2fd8adStls in + hist, off - hist, (off - hist) + got,
2124cf2fd8adStls &bits, &out, &outsize);
2125cf2fd8adStls bits &= 7;
2126cf2fd8adStls if ((more || left) && bits) {
2127cf2fd8adStls if (bits & 1) {
2128cf2fd8adStls writen(g.outd, out, outsize);
2129cf2fd8adStls if (bits == 7)
2130cf2fd8adStls writen(g.outd, (unsigned char *)"\0", 1);
2131cf2fd8adStls writen(g.outd, (unsigned char *)"\0\0\xff\xff", 4);
2132cf2fd8adStls }
2133cf2fd8adStls else {
2134cf2fd8adStls assert(outsize > 0);
2135cf2fd8adStls writen(g.outd, out, outsize - 1);
2136cf2fd8adStls do {
2137cf2fd8adStls out[outsize - 1] += 2 << bits;
2138cf2fd8adStls writen(g.outd, out + outsize - 1, 1);
2139cf2fd8adStls out[outsize - 1] = 0;
2140cf2fd8adStls bits += 2;
2141cf2fd8adStls } while (bits < 8);
2142cf2fd8adStls writen(g.outd, out + outsize - 1, 1);
2143cf2fd8adStls }
2144cf2fd8adStls }
2145cf2fd8adStls else
2146cf2fd8adStls writen(g.outd, out, outsize);
2147cf2fd8adStls free(out);
2148cf2fd8adStls while (got > MAXP2) {
2149cf2fd8adStls check = CHECK(check, strm->next_in, MAXP2);
2150cf2fd8adStls strm->next_in += MAXP2;
2151cf2fd8adStls got -= MAXP2;
2152cf2fd8adStls }
2153cf2fd8adStls check = CHECK(check, strm->next_in, (unsigned)got);
2154cf2fd8adStls strm->next_in += got;
2155cf2fd8adStls got = left;
2156cf2fd8adStls }
2157e25fdb51Smrg
2158e25fdb51Smrg /* do until no more input */
2159cf2fd8adStls } while (more || got);
2160e25fdb51Smrg
2161e25fdb51Smrg /* write trailer */
2162e25fdb51Smrg put_trailer(ulen, clen, check, head);
2163e25fdb51Smrg }
2164e25fdb51Smrg
2165e25fdb51Smrg /* --- decompression --- */
2166e25fdb51Smrg
2167e25fdb51Smrg #ifndef NOTHREAD
2168e25fdb51Smrg /* parallel read thread */
load_read(void * dummy)2169e25fdb51Smrg local void load_read(void *dummy)
2170e25fdb51Smrg {
2171e25fdb51Smrg size_t len;
2172e25fdb51Smrg
2173e25fdb51Smrg (void)dummy;
2174e25fdb51Smrg
2175e25fdb51Smrg Trace(("-- launched decompress read thread"));
2176e25fdb51Smrg do {
2177cf2fd8adStls possess(g.load_state);
2178cf2fd8adStls wait_for(g.load_state, TO_BE, 1);
2179cf2fd8adStls g.in_len = len = readn(g.ind, g.in_which ? g.in_buf : g.in_buf2, BUF);
2180e25fdb51Smrg Trace(("-- decompress read thread read %lu bytes", len));
2181cf2fd8adStls twist(g.load_state, TO, 0);
2182e25fdb51Smrg } while (len == BUF);
2183e25fdb51Smrg Trace(("-- exited decompress read thread"));
2184e25fdb51Smrg }
2185e25fdb51Smrg #endif
2186e25fdb51Smrg
2187cf2fd8adStls /* load() is called when the input has been consumed in order to provide more
2188cf2fd8adStls input data: load the input buffer with BUF or fewer bytes (fewer if at end
2189cf2fd8adStls of file) from the file g.ind, set g.in_next to point to the g.in_left bytes
2190cf2fd8adStls read, update g.in_tot, and return g.in_left -- g.in_eof is set to true when
2191cf2fd8adStls g.in_left has gone to zero and there is no more data left to read */
load(void)2192e25fdb51Smrg local size_t load(void)
2193e25fdb51Smrg {
2194e25fdb51Smrg /* if already detected end of file, do nothing */
2195cf2fd8adStls if (g.in_short) {
2196cf2fd8adStls g.in_eof = 1;
2197cf2fd8adStls g.in_left = 0;
2198e25fdb51Smrg return 0;
2199e25fdb51Smrg }
2200e25fdb51Smrg
2201e25fdb51Smrg #ifndef NOTHREAD
2202e25fdb51Smrg /* if first time in or procs == 1, read a buffer to have something to
2203e25fdb51Smrg return, otherwise wait for the previous read job to complete */
2204cf2fd8adStls if (g.procs > 1) {
2205e25fdb51Smrg /* if first time, fire up the read thread, ask for a read */
2206cf2fd8adStls if (g.in_which == -1) {
2207cf2fd8adStls g.in_which = 1;
2208cf2fd8adStls g.load_state = new_lock(1);
2209cf2fd8adStls g.load_thread = launch(load_read, NULL);
2210e25fdb51Smrg }
2211e25fdb51Smrg
2212e25fdb51Smrg /* wait for the previously requested read to complete */
2213cf2fd8adStls possess(g.load_state);
2214cf2fd8adStls wait_for(g.load_state, TO_BE, 0);
2215cf2fd8adStls release(g.load_state);
2216e25fdb51Smrg
2217e25fdb51Smrg /* set up input buffer with the data just read */
2218cf2fd8adStls g.in_next = g.in_which ? g.in_buf : g.in_buf2;
2219cf2fd8adStls g.in_left = g.in_len;
2220e25fdb51Smrg
2221e25fdb51Smrg /* if not at end of file, alert read thread to load next buffer,
2222cf2fd8adStls alternate between g.in_buf and g.in_buf2 */
2223cf2fd8adStls if (g.in_len == BUF) {
2224cf2fd8adStls g.in_which = 1 - g.in_which;
2225cf2fd8adStls possess(g.load_state);
2226cf2fd8adStls twist(g.load_state, TO, 1);
2227e25fdb51Smrg }
2228e25fdb51Smrg
2229e25fdb51Smrg /* at end of file -- join read thread (already exited), clean up */
2230e25fdb51Smrg else {
2231cf2fd8adStls join(g.load_thread);
2232cf2fd8adStls free_lock(g.load_state);
2233cf2fd8adStls g.in_which = -1;
2234e25fdb51Smrg }
2235e25fdb51Smrg }
2236e25fdb51Smrg else
2237e25fdb51Smrg #endif
2238e25fdb51Smrg {
2239cf2fd8adStls /* don't use threads -- simply read a buffer into g.in_buf */
2240cf2fd8adStls g.in_left = readn(g.ind, g.in_next = g.in_buf, BUF);
2241e25fdb51Smrg }
2242e25fdb51Smrg
2243e25fdb51Smrg /* note end of file */
2244cf2fd8adStls if (g.in_left < BUF) {
2245cf2fd8adStls g.in_short = 1;
2246e25fdb51Smrg
2247e25fdb51Smrg /* if we got bupkis, now is the time to mark eof */
2248cf2fd8adStls if (g.in_left == 0)
2249cf2fd8adStls g.in_eof = 1;
2250e25fdb51Smrg }
2251e25fdb51Smrg
2252e25fdb51Smrg /* update the total and return the available bytes */
2253cf2fd8adStls g.in_tot += g.in_left;
2254cf2fd8adStls return g.in_left;
2255e25fdb51Smrg }
2256e25fdb51Smrg
2257e25fdb51Smrg /* initialize for reading new input */
in_init(void)2258e25fdb51Smrg local void in_init(void)
2259e25fdb51Smrg {
2260cf2fd8adStls g.in_left = 0;
2261cf2fd8adStls g.in_eof = 0;
2262cf2fd8adStls g.in_short = 0;
2263cf2fd8adStls g.in_tot = 0;
2264e25fdb51Smrg #ifndef NOTHREAD
2265cf2fd8adStls g.in_which = -1;
2266e25fdb51Smrg #endif
2267e25fdb51Smrg }
2268e25fdb51Smrg
2269e25fdb51Smrg /* buffered reading macros for decompression and listing */
2270cf2fd8adStls #define GET() (g.in_eof || (g.in_left == 0 && load() == 0) ? EOF : \
2271cf2fd8adStls (g.in_left--, *g.in_next++))
2272cf2fd8adStls #define GET2() (tmp2 = GET(), tmp2 + ((unsigned)(GET()) << 8))
2273e25fdb51Smrg #define GET4() (tmp4 = GET2(), tmp4 + ((unsigned long)(GET2()) << 16))
2274e25fdb51Smrg #define SKIP(dist) \
2275e25fdb51Smrg do { \
2276e25fdb51Smrg size_t togo = (dist); \
2277cf2fd8adStls while (togo > g.in_left) { \
2278cf2fd8adStls togo -= g.in_left; \
2279e25fdb51Smrg if (load() == 0) \
2280e25fdb51Smrg return -1; \
2281e25fdb51Smrg } \
2282cf2fd8adStls g.in_left -= togo; \
2283cf2fd8adStls g.in_next += togo; \
2284e25fdb51Smrg } while (0)
2285e25fdb51Smrg
2286cf2fd8adStls /* pull LSB order or MSB order integers from an unsigned char buffer */
2287cf2fd8adStls #define PULL2L(p) ((p)[0] + ((unsigned)((p)[1]) << 8))
2288cf2fd8adStls #define PULL4L(p) (PULL2L(p) + ((unsigned long)(PULL2L((p) + 2)) << 16))
2289cf2fd8adStls #define PULL2M(p) (((unsigned)((p)[0]) << 8) + (p)[1])
2290cf2fd8adStls #define PULL4M(p) (((unsigned long)(PULL2M(p)) << 16) + PULL2M((p) + 2))
2291cf2fd8adStls
2292e25fdb51Smrg /* convert MS-DOS date and time to a Unix time, assuming current timezone
2293e25fdb51Smrg (you got a better idea?) */
dos2time(unsigned long dos)2294e25fdb51Smrg local time_t dos2time(unsigned long dos)
2295e25fdb51Smrg {
2296e25fdb51Smrg struct tm tm;
2297e25fdb51Smrg
2298e25fdb51Smrg if (dos == 0)
2299e25fdb51Smrg return time(NULL);
2300e25fdb51Smrg tm.tm_year = ((int)(dos >> 25) & 0x7f) + 80;
2301e25fdb51Smrg tm.tm_mon = ((int)(dos >> 21) & 0xf) - 1;
2302e25fdb51Smrg tm.tm_mday = (int)(dos >> 16) & 0x1f;
2303e25fdb51Smrg tm.tm_hour = (int)(dos >> 11) & 0x1f;
2304e25fdb51Smrg tm.tm_min = (int)(dos >> 5) & 0x3f;
2305e25fdb51Smrg tm.tm_sec = (int)(dos << 1) & 0x3e;
2306e25fdb51Smrg tm.tm_isdst = -1; /* figure out if DST or not */
2307e25fdb51Smrg return mktime(&tm);
2308e25fdb51Smrg }
2309e25fdb51Smrg
2310e25fdb51Smrg /* convert an unsigned 32-bit integer to signed, even if long > 32 bits */
tolong(unsigned long val)2311e25fdb51Smrg local long tolong(unsigned long val)
2312e25fdb51Smrg {
2313e25fdb51Smrg return (long)(val & 0x7fffffffUL) - (long)(val & 0x80000000UL);
2314e25fdb51Smrg }
2315e25fdb51Smrg
2316e25fdb51Smrg #define LOW32 0xffffffffUL
2317e25fdb51Smrg
2318e25fdb51Smrg /* process zip extra field to extract zip64 lengths and Unix mod time */
read_extra(unsigned len,int save)2319e25fdb51Smrg local int read_extra(unsigned len, int save)
2320e25fdb51Smrg {
2321e25fdb51Smrg unsigned id, size, tmp2;
2322e25fdb51Smrg unsigned long tmp4;
2323e25fdb51Smrg
2324e25fdb51Smrg /* process extra blocks */
2325e25fdb51Smrg while (len >= 4) {
2326e25fdb51Smrg id = GET2();
2327e25fdb51Smrg size = GET2();
2328cf2fd8adStls if (g.in_eof)
2329e25fdb51Smrg return -1;
2330e25fdb51Smrg len -= 4;
2331e25fdb51Smrg if (size > len)
2332e25fdb51Smrg break;
2333e25fdb51Smrg len -= size;
2334e25fdb51Smrg if (id == 0x0001) {
2335e25fdb51Smrg /* Zip64 Extended Information Extra Field */
2336cf2fd8adStls if (g.zip_ulen == LOW32 && size >= 8) {
2337cf2fd8adStls g.zip_ulen = GET4();
2338e25fdb51Smrg SKIP(4);
2339e25fdb51Smrg size -= 8;
2340e25fdb51Smrg }
2341cf2fd8adStls if (g.zip_clen == LOW32 && size >= 8) {
2342cf2fd8adStls g.zip_clen = GET4();
2343e25fdb51Smrg SKIP(4);
2344e25fdb51Smrg size -= 8;
2345e25fdb51Smrg }
2346e25fdb51Smrg }
2347e25fdb51Smrg if (save) {
2348e25fdb51Smrg if ((id == 0x000d || id == 0x5855) && size >= 8) {
2349e25fdb51Smrg /* PKWare Unix or Info-ZIP Type 1 Unix block */
2350e25fdb51Smrg SKIP(4);
2351cf2fd8adStls g.stamp = tolong(GET4());
2352e25fdb51Smrg size -= 8;
2353e25fdb51Smrg }
2354e25fdb51Smrg if (id == 0x5455 && size >= 5) {
2355e25fdb51Smrg /* Extended Timestamp block */
2356e25fdb51Smrg size--;
2357e25fdb51Smrg if (GET() & 1) {
2358cf2fd8adStls g.stamp = tolong(GET4());
2359e25fdb51Smrg size -= 4;
2360e25fdb51Smrg }
2361e25fdb51Smrg }
2362e25fdb51Smrg }
2363e25fdb51Smrg SKIP(size);
2364e25fdb51Smrg }
2365e25fdb51Smrg SKIP(len);
2366e25fdb51Smrg return 0;
2367e25fdb51Smrg }
2368e25fdb51Smrg
2369cf2fd8adStls /* read a gzip, zip, zlib, or lzw header from ind and return the method in the
2370cf2fd8adStls range 0..256 (256 implies a zip method greater than 255), or on error return
2371cf2fd8adStls negative: -1 is immediate EOF, -2 is not a recognized compressed format, -3
2372cf2fd8adStls is premature EOF within the header, -4 is unexpected header flag values, -5
2373cf2fd8adStls is the zip central directory; a method of 257 is lzw -- if the return value
2374cf2fd8adStls is not negative, then get_header() sets g.form to indicate gzip (0), zlib
2375cf2fd8adStls (1), or zip (2, or 3 if the entry is followed by a data descriptor) */
get_header(int save)2376e25fdb51Smrg local int get_header(int save)
2377e25fdb51Smrg {
2378e25fdb51Smrg unsigned magic; /* magic header */
2379e25fdb51Smrg int method; /* compression method */
2380e25fdb51Smrg int flags; /* header flags */
2381e25fdb51Smrg unsigned fname, extra; /* name and extra field lengths */
2382e25fdb51Smrg unsigned tmp2; /* for macro */
2383e25fdb51Smrg unsigned long tmp4; /* for macro */
2384e25fdb51Smrg
2385e25fdb51Smrg /* clear return information */
2386e25fdb51Smrg if (save) {
2387cf2fd8adStls g.stamp = 0;
2388cf2fd8adStls RELEASE(g.hname);
2389e25fdb51Smrg }
2390e25fdb51Smrg
2391e25fdb51Smrg /* see if it's a gzip, zlib, or lzw file */
2392cf2fd8adStls g.form = -1;
2393cf2fd8adStls g.magic1 = GET();
2394cf2fd8adStls if (g.in_eof)
2395e25fdb51Smrg return -1;
2396cf2fd8adStls magic = g.magic1 << 8;
2397e25fdb51Smrg magic += GET();
2398cf2fd8adStls if (g.in_eof)
2399e25fdb51Smrg return -2;
2400e25fdb51Smrg if (magic % 31 == 0) { /* it's zlib */
2401cf2fd8adStls g.form = 1;
2402e25fdb51Smrg return (int)((magic >> 8) & 0xf);
2403e25fdb51Smrg }
2404e25fdb51Smrg if (magic == 0x1f9d) /* it's lzw */
2405cf2fd8adStls return 257;
2406e25fdb51Smrg if (magic == 0x504b) { /* it's zip */
2407cf2fd8adStls magic = GET2(); /* the rest of the signature */
2408cf2fd8adStls if (g.in_eof)
2409e25fdb51Smrg return -3;
2410cf2fd8adStls if (magic == 0x0201 || magic == 0x0806)
2411cf2fd8adStls return -5; /* central header or archive extra */
2412cf2fd8adStls if (magic != 0x0403)
2413cf2fd8adStls return -4; /* not a local header */
2414e25fdb51Smrg SKIP(2);
2415e25fdb51Smrg flags = GET2();
2416cf2fd8adStls if (g.in_eof)
2417e25fdb51Smrg return -3;
2418e25fdb51Smrg if (flags & 0xfff0)
2419e25fdb51Smrg return -4;
2420cf2fd8adStls method = GET(); /* return low byte of method or 256 */
2421cf2fd8adStls if (GET() != 0 || flags & 1)
2422cf2fd8adStls method = 256; /* unknown or encrypted */
2423cf2fd8adStls if (g.in_eof)
2424e25fdb51Smrg return -3;
2425e25fdb51Smrg if (save)
2426cf2fd8adStls g.stamp = dos2time(GET4());
2427e25fdb51Smrg else
2428e25fdb51Smrg SKIP(4);
2429cf2fd8adStls g.zip_crc = GET4();
2430cf2fd8adStls g.zip_clen = GET4();
2431cf2fd8adStls g.zip_ulen = GET4();
2432e25fdb51Smrg fname = GET2();
2433e25fdb51Smrg extra = GET2();
2434e25fdb51Smrg if (save) {
2435cf2fd8adStls char *next = g.hname = MALLOC(fname + 1);
2436cf2fd8adStls if (g.hname == NULL)
2437e25fdb51Smrg bail("not enough memory", "");
2438cf2fd8adStls while (fname > g.in_left) {
2439cf2fd8adStls memcpy(next, g.in_next, g.in_left);
2440cf2fd8adStls fname -= g.in_left;
2441cf2fd8adStls next += g.in_left;
2442e25fdb51Smrg if (load() == 0)
2443e25fdb51Smrg return -3;
2444e25fdb51Smrg }
2445cf2fd8adStls memcpy(next, g.in_next, fname);
2446cf2fd8adStls g.in_left -= fname;
2447cf2fd8adStls g.in_next += fname;
2448e25fdb51Smrg next += fname;
2449e25fdb51Smrg *next = 0;
2450e25fdb51Smrg }
2451e25fdb51Smrg else
2452e25fdb51Smrg SKIP(fname);
2453e25fdb51Smrg read_extra(extra, save);
2454cf2fd8adStls g.form = 2 + ((flags & 8) >> 3);
2455cf2fd8adStls return g.in_eof ? -3 : method;
2456e25fdb51Smrg }
2457cf2fd8adStls if (magic != 0x1f8b) { /* not gzip */
2458cf2fd8adStls g.in_left++; /* unget second magic byte */
2459cf2fd8adStls g.in_next--;
2460e25fdb51Smrg return -2;
2461cf2fd8adStls }
2462e25fdb51Smrg
2463e25fdb51Smrg /* it's gzip -- get method and flags */
2464e25fdb51Smrg method = GET();
2465e25fdb51Smrg flags = GET();
2466cf2fd8adStls if (g.in_eof)
2467e25fdb51Smrg return -1;
2468e25fdb51Smrg if (flags & 0xe0)
2469e25fdb51Smrg return -4;
2470e25fdb51Smrg
2471e25fdb51Smrg /* get time stamp */
2472e25fdb51Smrg if (save)
2473cf2fd8adStls g.stamp = tolong(GET4());
2474e25fdb51Smrg else
2475e25fdb51Smrg SKIP(4);
2476e25fdb51Smrg
2477e25fdb51Smrg /* skip extra field and OS */
2478e25fdb51Smrg SKIP(2);
2479e25fdb51Smrg
2480e25fdb51Smrg /* skip extra field, if present */
2481e25fdb51Smrg if (flags & 4) {
2482e25fdb51Smrg extra = GET2();
2483cf2fd8adStls if (g.in_eof)
2484e25fdb51Smrg return -3;
2485e25fdb51Smrg SKIP(extra);
2486e25fdb51Smrg }
2487e25fdb51Smrg
2488e25fdb51Smrg /* read file name, if present, into allocated memory */
2489e25fdb51Smrg if ((flags & 8) && save) {
2490e25fdb51Smrg unsigned char *end;
2491e25fdb51Smrg size_t copy, have, size = 128;
2492cf2fd8adStls g.hname = MALLOC(size);
2493cf2fd8adStls if (g.hname == NULL)
2494e25fdb51Smrg bail("not enough memory", "");
2495e25fdb51Smrg have = 0;
2496e25fdb51Smrg do {
2497cf2fd8adStls if (g.in_left == 0 && load() == 0)
2498e25fdb51Smrg return -3;
2499cf2fd8adStls end = memchr(g.in_next, 0, g.in_left);
2500cf2fd8adStls copy = end == NULL ? g.in_left : (size_t)(end - g.in_next) + 1;
2501e25fdb51Smrg if (have + copy > size) {
2502e25fdb51Smrg while (have + copy > (size <<= 1))
2503e25fdb51Smrg ;
2504cf2fd8adStls g.hname = REALLOC(g.hname, size);
2505cf2fd8adStls if (g.hname == NULL)
2506e25fdb51Smrg bail("not enough memory", "");
2507e25fdb51Smrg }
2508cf2fd8adStls memcpy(g.hname + have, g.in_next, copy);
2509e25fdb51Smrg have += copy;
2510cf2fd8adStls g.in_left -= copy;
2511cf2fd8adStls g.in_next += copy;
2512e25fdb51Smrg } while (end == NULL);
2513e25fdb51Smrg }
2514e25fdb51Smrg else if (flags & 8)
2515e25fdb51Smrg while (GET() != 0)
2516cf2fd8adStls if (g.in_eof)
2517e25fdb51Smrg return -3;
2518e25fdb51Smrg
2519e25fdb51Smrg /* skip comment */
2520e25fdb51Smrg if (flags & 16)
2521e25fdb51Smrg while (GET() != 0)
2522cf2fd8adStls if (g.in_eof)
2523e25fdb51Smrg return -3;
2524e25fdb51Smrg
2525e25fdb51Smrg /* skip header crc */
2526e25fdb51Smrg if (flags & 2)
2527e25fdb51Smrg SKIP(2);
2528e25fdb51Smrg
2529cf2fd8adStls /* return gzip compression method */
2530cf2fd8adStls g.form = 0;
2531e25fdb51Smrg return method;
2532e25fdb51Smrg }
2533e25fdb51Smrg
2534e25fdb51Smrg /* --- list contents of compressed input (gzip, zlib, or lzw) */
2535e25fdb51Smrg
2536e25fdb51Smrg /* find standard compressed file suffix, return length of suffix */
compressed_suffix(char * nm)2537e25fdb51Smrg local size_t compressed_suffix(char *nm)
2538e25fdb51Smrg {
2539e25fdb51Smrg size_t len;
2540e25fdb51Smrg
2541e25fdb51Smrg len = strlen(nm);
2542e25fdb51Smrg if (len > 4) {
2543e25fdb51Smrg nm += len - 4;
2544e25fdb51Smrg len = 4;
2545e25fdb51Smrg if (strcmp(nm, ".zip") == 0 || strcmp(nm, ".ZIP") == 0 ||
2546e25fdb51Smrg strcmp(nm, ".tgz") == 0)
2547e25fdb51Smrg return 4;
2548e25fdb51Smrg }
2549e25fdb51Smrg if (len > 3) {
2550e25fdb51Smrg nm += len - 3;
2551e25fdb51Smrg len = 3;
2552e25fdb51Smrg if (strcmp(nm, ".gz") == 0 || strcmp(nm, "-gz") == 0 ||
2553e25fdb51Smrg strcmp(nm, ".zz") == 0 || strcmp(nm, "-zz") == 0)
2554e25fdb51Smrg return 3;
2555e25fdb51Smrg }
2556e25fdb51Smrg if (len > 2) {
2557e25fdb51Smrg nm += len - 2;
2558e25fdb51Smrg if (strcmp(nm, ".z") == 0 || strcmp(nm, "-z") == 0 ||
2559e25fdb51Smrg strcmp(nm, "_z") == 0 || strcmp(nm, ".Z") == 0)
2560e25fdb51Smrg return 2;
2561e25fdb51Smrg }
2562e25fdb51Smrg return 0;
2563e25fdb51Smrg }
2564e25fdb51Smrg
2565e25fdb51Smrg /* listing file name lengths for -l and -lv */
2566e25fdb51Smrg #define NAMEMAX1 48 /* name display limit at verbosity 1 */
2567e25fdb51Smrg #define NAMEMAX2 16 /* name display limit at verbosity 2 */
2568e25fdb51Smrg
2569e25fdb51Smrg /* print gzip or lzw file information */
show_info(int method,unsigned long check,off_t len,int cont)2570e25fdb51Smrg local void show_info(int method, unsigned long check, off_t len, int cont)
2571e25fdb51Smrg {
2572e25fdb51Smrg size_t max; /* maximum name length for current verbosity */
2573e25fdb51Smrg size_t n; /* name length without suffix */
2574e25fdb51Smrg time_t now; /* for getting current year */
2575e25fdb51Smrg char mod[26]; /* modification time in text */
2576cf2fd8adStls char tag[NAMEMAX1+1]; /* header or file name, possibly truncated */
2577e25fdb51Smrg
2578e25fdb51Smrg /* create abbreviated name from header file name or actual file name */
2579cf2fd8adStls max = g.verbosity > 1 ? NAMEMAX2 : NAMEMAX1;
2580cf2fd8adStls memset(tag, 0, max + 1);
2581e25fdb51Smrg if (cont)
2582cf2fd8adStls strncpy(tag, "<...>", max + 1);
2583cf2fd8adStls else if (g.hname == NULL) {
2584cf2fd8adStls n = strlen(g.inf) - compressed_suffix(g.inf);
2585cf2fd8adStls strncpy(tag, g.inf, n > max + 1 ? max + 1 : n);
2586cf2fd8adStls if (strcmp(g.inf + n, ".tgz") == 0 && n < max + 1)
2587cf2fd8adStls strncpy(tag + n, ".tar", max + 1 - n);
2588e25fdb51Smrg }
2589e25fdb51Smrg else
2590cf2fd8adStls strncpy(tag, g.hname, max + 1);
2591cf2fd8adStls if (tag[max])
2592cf2fd8adStls strcpy(tag + max - 3, "...");
2593e25fdb51Smrg
2594e25fdb51Smrg /* convert time stamp to text */
2595cf2fd8adStls if (g.stamp) {
2596cf2fd8adStls strcpy(mod, ctime(&g.stamp));
2597e25fdb51Smrg now = time(NULL);
2598e25fdb51Smrg if (strcmp(mod + 20, ctime(&now) + 20) != 0)
2599e25fdb51Smrg strcpy(mod + 11, mod + 19);
2600e25fdb51Smrg }
2601e25fdb51Smrg else
2602e25fdb51Smrg strcpy(mod + 4, "------ -----");
2603e25fdb51Smrg mod[16] = 0;
2604e25fdb51Smrg
2605e25fdb51Smrg /* if first time, print header */
2606cf2fd8adStls if (g.first) {
2607cf2fd8adStls if (g.verbosity > 1)
2608e25fdb51Smrg fputs("method check timestamp ", stdout);
2609cf2fd8adStls if (g.verbosity > 0)
2610e25fdb51Smrg puts("compressed original reduced name");
2611cf2fd8adStls g.first = 0;
2612e25fdb51Smrg }
2613e25fdb51Smrg
2614e25fdb51Smrg /* print information */
2615cf2fd8adStls if (g.verbosity > 1) {
2616cf2fd8adStls if (g.form == 3 && !g.decode)
2617e25fdb51Smrg printf("zip%3d -------- %s ", method, mod + 4);
2618cf2fd8adStls else if (g.form > 1)
2619e25fdb51Smrg printf("zip%3d %08lx %s ", method, check, mod + 4);
2620cf2fd8adStls else if (g.form == 1)
2621e25fdb51Smrg printf("zlib%2d %08lx %s ", method, check, mod + 4);
2622cf2fd8adStls else if (method == 257)
2623e25fdb51Smrg printf("lzw -------- %s ", mod + 4);
2624e25fdb51Smrg else
2625e25fdb51Smrg printf("gzip%2d %08lx %s ", method, check, mod + 4);
2626e25fdb51Smrg }
2627cf2fd8adStls if (g.verbosity > 0) {
2628cf2fd8adStls if ((g.form == 3 && !g.decode) ||
2629cf2fd8adStls (method == 8 && g.in_tot > (len + (len >> 10) + 12)) ||
2630cf2fd8adStls (method == 257 && g.in_tot > len + (len >> 1) + 3))
2631cf2fd8adStls #if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3
2632cf2fd8adStls printf("%10jd %10jd? unk %s\n",
2633cf2fd8adStls (intmax_t)g.in_tot, (intmax_t)len, tag);
2634e25fdb51Smrg else
2635cf2fd8adStls printf("%10jd %10jd %6.1f%% %s\n",
2636cf2fd8adStls (intmax_t)g.in_tot, (intmax_t)len,
2637cf2fd8adStls len == 0 ? 0 : 100 * (len - g.in_tot)/(double)len,
2638cf2fd8adStls tag);
2639cf2fd8adStls #else
2640cf2fd8adStls printf(sizeof(off_t) == sizeof(long) ?
2641cf2fd8adStls "%10ld %10ld? unk %s\n" : "%10lld %10lld? unk %s\n",
2642cf2fd8adStls g.in_tot, len, tag);
2643cf2fd8adStls else
2644cf2fd8adStls printf(sizeof(off_t) == sizeof(long) ?
2645cf2fd8adStls "%10ld %10ld %6.1f%% %s\n" : "%10lld %10lld %6.1f%% %s\n",
2646cf2fd8adStls g.in_tot, len,
2647cf2fd8adStls len == 0 ? 0 : 100 * (len - g.in_tot)/(double)len,
2648cf2fd8adStls tag);
2649cf2fd8adStls #endif
2650e25fdb51Smrg }
2651e25fdb51Smrg }
2652e25fdb51Smrg
2653e25fdb51Smrg /* list content information about the gzip file at ind (only works if the gzip
2654e25fdb51Smrg file contains a single gzip stream with no junk at the end, and only works
2655e25fdb51Smrg well if the uncompressed length is less than 4 GB) */
list_info(void)2656e25fdb51Smrg local void list_info(void)
2657e25fdb51Smrg {
2658e25fdb51Smrg int method; /* get_header() return value */
2659e25fdb51Smrg size_t n; /* available trailer bytes */
2660e25fdb51Smrg off_t at; /* used to calculate compressed length */
2661e25fdb51Smrg unsigned char tail[8]; /* trailer containing check and length */
2662e25fdb51Smrg unsigned long check, len; /* check value and length from trailer */
2663e25fdb51Smrg
2664e25fdb51Smrg /* initialize input buffer */
2665e25fdb51Smrg in_init();
2666e25fdb51Smrg
2667e25fdb51Smrg /* read header information and position input after header */
2668e25fdb51Smrg method = get_header(1);
2669e25fdb51Smrg if (method < 0) {
2670cf2fd8adStls RELEASE(g.hname);
2671cf2fd8adStls if (method != -1 && g.verbosity > 1)
2672cf2fd8adStls complain("%s not a compressed file -- skipping", g.inf);
2673e25fdb51Smrg return;
2674e25fdb51Smrg }
2675e25fdb51Smrg
2676e25fdb51Smrg /* list zip file */
2677cf2fd8adStls if (g.form > 1) {
2678cf2fd8adStls g.in_tot = g.zip_clen;
2679cf2fd8adStls show_info(method, g.zip_crc, g.zip_ulen, 0);
2680e25fdb51Smrg return;
2681e25fdb51Smrg }
2682e25fdb51Smrg
2683e25fdb51Smrg /* list zlib file */
2684cf2fd8adStls if (g.form == 1) {
2685cf2fd8adStls at = lseek(g.ind, 0, SEEK_END);
2686e25fdb51Smrg if (at == -1) {
2687e25fdb51Smrg check = 0;
2688e25fdb51Smrg do {
2689cf2fd8adStls len = g.in_left < 4 ? g.in_left : 4;
2690cf2fd8adStls g.in_next += g.in_left - len;
2691e25fdb51Smrg while (len--)
2692cf2fd8adStls check = (check << 8) + *g.in_next++;
2693e25fdb51Smrg } while (load() != 0);
2694e25fdb51Smrg check &= LOW32;
2695e25fdb51Smrg }
2696e25fdb51Smrg else {
2697cf2fd8adStls g.in_tot = at;
2698cf2fd8adStls lseek(g.ind, -4, SEEK_END);
2699cf2fd8adStls readn(g.ind, tail, 4);
2700cf2fd8adStls check = PULL4M(tail);
2701e25fdb51Smrg }
2702cf2fd8adStls g.in_tot -= 6;
2703e25fdb51Smrg show_info(method, check, 0, 0);
2704e25fdb51Smrg return;
2705e25fdb51Smrg }
2706e25fdb51Smrg
2707e25fdb51Smrg /* list lzw file */
2708cf2fd8adStls if (method == 257) {
2709cf2fd8adStls at = lseek(g.ind, 0, SEEK_END);
2710e25fdb51Smrg if (at == -1)
2711e25fdb51Smrg while (load() != 0)
2712e25fdb51Smrg ;
2713e25fdb51Smrg else
2714cf2fd8adStls g.in_tot = at;
2715cf2fd8adStls g.in_tot -= 3;
2716e25fdb51Smrg show_info(method, 0, 0, 0);
2717e25fdb51Smrg return;
2718e25fdb51Smrg }
2719e25fdb51Smrg
2720e25fdb51Smrg /* skip to end to get trailer (8 bytes), compute compressed length */
2721cf2fd8adStls if (g.in_short) { /* whole thing already read */
2722cf2fd8adStls if (g.in_left < 8) {
2723cf2fd8adStls complain("%s not a valid gzip file -- skipping", g.inf);
2724e25fdb51Smrg return;
2725e25fdb51Smrg }
2726cf2fd8adStls g.in_tot = g.in_left - 8; /* compressed size */
2727cf2fd8adStls memcpy(tail, g.in_next + (g.in_left - 8), 8);
2728e25fdb51Smrg }
2729cf2fd8adStls else if ((at = lseek(g.ind, -8, SEEK_END)) != -1) {
2730cf2fd8adStls g.in_tot = at - g.in_tot + g.in_left; /* compressed size */
2731cf2fd8adStls readn(g.ind, tail, 8); /* get trailer */
2732e25fdb51Smrg }
2733e25fdb51Smrg else { /* can't seek */
2734cf2fd8adStls at = g.in_tot - g.in_left; /* save header size */
2735e25fdb51Smrg do {
2736cf2fd8adStls n = g.in_left < 8 ? g.in_left : 8;
2737cf2fd8adStls memcpy(tail, g.in_next + (g.in_left - n), n);
2738e25fdb51Smrg load();
2739cf2fd8adStls } while (g.in_left == BUF); /* read until end */
2740cf2fd8adStls if (g.in_left < 8) {
2741cf2fd8adStls if (n + g.in_left < 8) {
2742cf2fd8adStls complain("%s not a valid gzip file -- skipping", g.inf);
2743e25fdb51Smrg return;
2744e25fdb51Smrg }
2745cf2fd8adStls if (g.in_left) {
2746cf2fd8adStls if (n + g.in_left > 8)
2747cf2fd8adStls memcpy(tail, tail + n - (8 - g.in_left), 8 - g.in_left);
2748cf2fd8adStls memcpy(tail + 8 - g.in_left, g.in_next, g.in_left);
2749e25fdb51Smrg }
2750e25fdb51Smrg }
2751e25fdb51Smrg else
2752cf2fd8adStls memcpy(tail, g.in_next + (g.in_left - 8), 8);
2753cf2fd8adStls g.in_tot -= at + 8;
2754e25fdb51Smrg }
2755cf2fd8adStls if (g.in_tot < 2) {
2756cf2fd8adStls complain("%s not a valid gzip file -- skipping", g.inf);
2757e25fdb51Smrg return;
2758e25fdb51Smrg }
2759e25fdb51Smrg
2760e25fdb51Smrg /* convert trailer to check and uncompressed length (modulo 2^32) */
2761cf2fd8adStls check = PULL4L(tail);
2762cf2fd8adStls len = PULL4L(tail + 4);
2763e25fdb51Smrg
2764e25fdb51Smrg /* list information about contents */
2765e25fdb51Smrg show_info(method, check, len, 0);
2766cf2fd8adStls RELEASE(g.hname);
2767cf2fd8adStls }
2768cf2fd8adStls
2769cf2fd8adStls /* --- copy input to output (when acting like cat) --- */
2770cf2fd8adStls
cat(void)2771cf2fd8adStls local void cat(void)
2772cf2fd8adStls {
2773cf2fd8adStls /* write first magic byte (if we're here, there's at least one byte) */
2774cf2fd8adStls writen(g.outd, &g.magic1, 1);
2775cf2fd8adStls g.out_tot = 1;
2776cf2fd8adStls
2777cf2fd8adStls /* copy the remainder of the input to the output (if there were any more
2778cf2fd8adStls bytes of input, then g.in_left is non-zero and g.in_next is pointing to
2779cf2fd8adStls the second magic byte) */
2780cf2fd8adStls while (g.in_left) {
2781cf2fd8adStls writen(g.outd, g.in_next, g.in_left);
2782cf2fd8adStls g.out_tot += g.in_left;
2783cf2fd8adStls g.in_left = 0;
2784cf2fd8adStls load();
2785cf2fd8adStls }
2786e25fdb51Smrg }
2787e25fdb51Smrg
2788e25fdb51Smrg /* --- decompress deflate input --- */
2789e25fdb51Smrg
2790e25fdb51Smrg /* call-back input function for inflateBack() */
inb(void * desc,unsigned char ** buf)2791e25fdb51Smrg local unsigned inb(void *desc, unsigned char **buf)
2792e25fdb51Smrg {
2793e25fdb51Smrg (void)desc;
2794e25fdb51Smrg load();
2795cf2fd8adStls *buf = g.in_next;
2796cf2fd8adStls return g.in_left;
2797e25fdb51Smrg }
2798e25fdb51Smrg
2799e25fdb51Smrg /* output buffers and window for infchk() and unlzw() */
2800e25fdb51Smrg #define OUTSIZE 32768U /* must be at least 32K for inflateBack() window */
2801e25fdb51Smrg local unsigned char out_buf[OUTSIZE];
2802e25fdb51Smrg
2803e25fdb51Smrg #ifndef NOTHREAD
2804e25fdb51Smrg /* output data for parallel write and check */
2805e25fdb51Smrg local unsigned char out_copy[OUTSIZE];
2806e25fdb51Smrg local size_t out_len;
2807e25fdb51Smrg
2808e25fdb51Smrg /* outb threads states */
2809e25fdb51Smrg local lock *outb_write_more = NULL;
2810e25fdb51Smrg local lock *outb_check_more;
2811e25fdb51Smrg
2812e25fdb51Smrg /* output write thread */
outb_write(void * dummy)2813e25fdb51Smrg local void outb_write(void *dummy)
2814e25fdb51Smrg {
2815e25fdb51Smrg size_t len;
2816e25fdb51Smrg
2817e25fdb51Smrg (void)dummy;
2818e25fdb51Smrg
2819e25fdb51Smrg Trace(("-- launched decompress write thread"));
2820e25fdb51Smrg do {
2821e25fdb51Smrg possess(outb_write_more);
2822e25fdb51Smrg wait_for(outb_write_more, TO_BE, 1);
2823e25fdb51Smrg len = out_len;
2824cf2fd8adStls if (len && g.decode == 1)
2825cf2fd8adStls writen(g.outd, out_copy, len);
2826e25fdb51Smrg Trace(("-- decompress wrote %lu bytes", len));
2827e25fdb51Smrg twist(outb_write_more, TO, 0);
2828e25fdb51Smrg } while (len);
2829e25fdb51Smrg Trace(("-- exited decompress write thread"));
2830e25fdb51Smrg }
2831e25fdb51Smrg
2832e25fdb51Smrg /* output check thread */
outb_check(void * dummy)2833e25fdb51Smrg local void outb_check(void *dummy)
2834e25fdb51Smrg {
2835e25fdb51Smrg size_t len;
2836e25fdb51Smrg
2837e25fdb51Smrg (void)dummy;
2838e25fdb51Smrg
2839e25fdb51Smrg Trace(("-- launched decompress check thread"));
2840e25fdb51Smrg do {
2841e25fdb51Smrg possess(outb_check_more);
2842e25fdb51Smrg wait_for(outb_check_more, TO_BE, 1);
2843e25fdb51Smrg len = out_len;
2844cf2fd8adStls g.out_check = CHECK(g.out_check, out_copy, len);
2845e25fdb51Smrg Trace(("-- decompress checked %lu bytes", len));
2846e25fdb51Smrg twist(outb_check_more, TO, 0);
2847e25fdb51Smrg } while (len);
2848e25fdb51Smrg Trace(("-- exited decompress check thread"));
2849e25fdb51Smrg }
2850e25fdb51Smrg #endif
2851e25fdb51Smrg
2852e25fdb51Smrg /* call-back output function for inflateBack() -- wait for the last write and
2853e25fdb51Smrg check calculation to complete, copy the write buffer, and then alert the
2854e25fdb51Smrg write and check threads and return for more decompression while that's
2855e25fdb51Smrg going on (or just write and check if no threads or if proc == 1) */
outb(void * desc,unsigned char * buf,unsigned len)2856e25fdb51Smrg local int outb(void *desc, unsigned char *buf, unsigned len)
2857e25fdb51Smrg {
2858e25fdb51Smrg #ifndef NOTHREAD
2859e25fdb51Smrg static thread *wr, *ch;
2860e25fdb51Smrg
2861cf2fd8adStls if (g.procs > 1) {
2862e25fdb51Smrg /* if first time, initialize state and launch threads */
2863e25fdb51Smrg if (outb_write_more == NULL) {
2864e25fdb51Smrg outb_write_more = new_lock(0);
2865e25fdb51Smrg outb_check_more = new_lock(0);
2866e25fdb51Smrg wr = launch(outb_write, NULL);
2867e25fdb51Smrg ch = launch(outb_check, NULL);
2868e25fdb51Smrg }
2869e25fdb51Smrg
2870e25fdb51Smrg /* wait for previous write and check threads to complete */
2871e25fdb51Smrg possess(outb_check_more);
2872e25fdb51Smrg wait_for(outb_check_more, TO_BE, 0);
2873e25fdb51Smrg possess(outb_write_more);
2874e25fdb51Smrg wait_for(outb_write_more, TO_BE, 0);
2875e25fdb51Smrg
2876e25fdb51Smrg /* copy the output and alert the worker bees */
2877e25fdb51Smrg out_len = len;
2878cf2fd8adStls g.out_tot += len;
2879e25fdb51Smrg memcpy(out_copy, buf, len);
2880e25fdb51Smrg twist(outb_write_more, TO, 1);
2881e25fdb51Smrg twist(outb_check_more, TO, 1);
2882e25fdb51Smrg
2883e25fdb51Smrg /* if requested with len == 0, clean up -- terminate and join write and
2884e25fdb51Smrg check threads, free lock */
2885e25fdb51Smrg if (len == 0) {
2886e25fdb51Smrg join(ch);
2887e25fdb51Smrg join(wr);
2888e25fdb51Smrg free_lock(outb_check_more);
2889e25fdb51Smrg free_lock(outb_write_more);
2890e25fdb51Smrg outb_write_more = NULL;
2891e25fdb51Smrg }
2892e25fdb51Smrg
2893e25fdb51Smrg /* return for more decompression while last buffer is being written
2894e25fdb51Smrg and having its check value calculated -- we wait for those to finish
2895e25fdb51Smrg the next time this function is called */
2896e25fdb51Smrg return 0;
2897e25fdb51Smrg }
2898e25fdb51Smrg #endif
2899e25fdb51Smrg
2900cf2fd8adStls (void)desc;
2901cf2fd8adStls
2902e25fdb51Smrg /* if just one process or no threads, then do it without threads */
2903e25fdb51Smrg if (len) {
2904cf2fd8adStls if (g.decode == 1)
2905cf2fd8adStls writen(g.outd, buf, len);
2906cf2fd8adStls g.out_check = CHECK(g.out_check, buf, len);
2907cf2fd8adStls g.out_tot += len;
2908e25fdb51Smrg }
2909e25fdb51Smrg return 0;
2910e25fdb51Smrg }
2911e25fdb51Smrg
2912e25fdb51Smrg /* inflate for decompression or testing -- decompress from ind to outd unless
2913e25fdb51Smrg decode != 1, in which case just test ind, and then also list if list != 0;
2914e25fdb51Smrg look for and decode multiple, concatenated gzip and/or zlib streams;
2915e25fdb51Smrg read and check the gzip, zlib, or zip trailer */
infchk(void)2916e25fdb51Smrg local void infchk(void)
2917e25fdb51Smrg {
2918cf2fd8adStls int ret, cont, was;
2919e25fdb51Smrg unsigned long check, len;
2920e25fdb51Smrg z_stream strm;
2921e25fdb51Smrg unsigned tmp2;
2922e25fdb51Smrg unsigned long tmp4;
2923e25fdb51Smrg off_t clen;
2924e25fdb51Smrg
2925e25fdb51Smrg cont = 0;
2926e25fdb51Smrg do {
2927e25fdb51Smrg /* header already read -- set up for decompression */
2928cf2fd8adStls g.in_tot = g.in_left; /* track compressed data length */
2929cf2fd8adStls g.out_tot = 0;
2930cf2fd8adStls g.out_check = CHECK(0L, Z_NULL, 0);
2931cf2fd8adStls strm.zalloc = ZALLOC;
2932cf2fd8adStls strm.zfree = ZFREE;
2933cf2fd8adStls strm.opaque = OPAQUE;
2934e25fdb51Smrg ret = inflateBackInit(&strm, 15, out_buf);
2935e25fdb51Smrg if (ret != Z_OK)
2936e25fdb51Smrg bail("not enough memory", "");
2937e25fdb51Smrg
2938e25fdb51Smrg /* decompress, compute lengths and check value */
2939cf2fd8adStls strm.avail_in = g.in_left;
2940cf2fd8adStls strm.next_in = g.in_next;
2941e25fdb51Smrg ret = inflateBack(&strm, inb, NULL, outb, NULL);
2942e25fdb51Smrg if (ret != Z_STREAM_END)
2943cf2fd8adStls bail("corrupted input -- invalid deflate data: ", g.inf);
2944cf2fd8adStls g.in_left = strm.avail_in;
2945cf2fd8adStls g.in_next = strm.next_in;
2946e25fdb51Smrg inflateBackEnd(&strm);
2947e25fdb51Smrg outb(NULL, NULL, 0); /* finish off final write and check */
2948e25fdb51Smrg
2949e25fdb51Smrg /* compute compressed data length */
2950cf2fd8adStls clen = g.in_tot - g.in_left;
2951e25fdb51Smrg
2952e25fdb51Smrg /* read and check trailer */
2953cf2fd8adStls if (g.form > 1) { /* zip local trailer (if any) */
2954cf2fd8adStls if (g.form == 3) { /* data descriptor follows */
2955e25fdb51Smrg /* read original version of data descriptor */
2956cf2fd8adStls g.zip_crc = GET4();
2957cf2fd8adStls g.zip_clen = GET4();
2958cf2fd8adStls g.zip_ulen = GET4();
2959cf2fd8adStls if (g.in_eof)
2960cf2fd8adStls bail("corrupted zip entry -- missing trailer: ", g.inf);
2961e25fdb51Smrg
2962e25fdb51Smrg /* if crc doesn't match, try info-zip variant with sig */
2963cf2fd8adStls if (g.zip_crc != g.out_check) {
2964cf2fd8adStls if (g.zip_crc != 0x08074b50UL || g.zip_clen != g.out_check)
2965cf2fd8adStls bail("corrupted zip entry -- crc32 mismatch: ", g.inf);
2966cf2fd8adStls g.zip_crc = g.zip_clen;
2967cf2fd8adStls g.zip_clen = g.zip_ulen;
2968cf2fd8adStls g.zip_ulen = GET4();
2969cf2fd8adStls }
2970cf2fd8adStls
2971cf2fd8adStls /* handle incredibly rare cases where crc equals signature */
2972cf2fd8adStls else if (g.zip_crc == 0x08074b50UL &&
2973cf2fd8adStls g.zip_clen == g.zip_crc &&
2974cf2fd8adStls ((clen & LOW32) != g.zip_crc ||
2975cf2fd8adStls g.zip_ulen == g.zip_crc)) {
2976cf2fd8adStls g.zip_crc = g.zip_clen;
2977cf2fd8adStls g.zip_clen = g.zip_ulen;
2978cf2fd8adStls g.zip_ulen = GET4();
2979e25fdb51Smrg }
2980e25fdb51Smrg
2981e25fdb51Smrg /* if second length doesn't match, try 64-bit lengths */
2982cf2fd8adStls if (g.zip_ulen != (g.out_tot & LOW32)) {
2983cf2fd8adStls g.zip_ulen = GET4();
2984e25fdb51Smrg (void)GET4();
2985e25fdb51Smrg }
2986cf2fd8adStls if (g.in_eof)
2987cf2fd8adStls bail("corrupted zip entry -- missing trailer: ", g.inf);
2988e25fdb51Smrg }
2989cf2fd8adStls if (g.zip_clen != (clen & LOW32) ||
2990cf2fd8adStls g.zip_ulen != (g.out_tot & LOW32))
2991cf2fd8adStls bail("corrupted zip entry -- length mismatch: ", g.inf);
2992cf2fd8adStls check = g.zip_crc;
2993e25fdb51Smrg }
2994cf2fd8adStls else if (g.form == 1) { /* zlib (big-endian) trailer */
2995e25fdb51Smrg check = (unsigned long)(GET()) << 24;
2996e25fdb51Smrg check += (unsigned long)(GET()) << 16;
2997cf2fd8adStls check += (unsigned)(GET()) << 8;
2998e25fdb51Smrg check += GET();
2999cf2fd8adStls if (g.in_eof)
3000cf2fd8adStls bail("corrupted zlib stream -- missing trailer: ", g.inf);
3001cf2fd8adStls if (check != g.out_check)
3002cf2fd8adStls bail("corrupted zlib stream -- adler32 mismatch: ", g.inf);
3003e25fdb51Smrg }
3004e25fdb51Smrg else { /* gzip trailer */
3005e25fdb51Smrg check = GET4();
3006e25fdb51Smrg len = GET4();
3007cf2fd8adStls if (g.in_eof)
3008cf2fd8adStls bail("corrupted gzip stream -- missing trailer: ", g.inf);
3009cf2fd8adStls if (check != g.out_check)
3010cf2fd8adStls bail("corrupted gzip stream -- crc32 mismatch: ", g.inf);
3011cf2fd8adStls if (len != (g.out_tot & LOW32))
3012cf2fd8adStls bail("corrupted gzip stream -- length mismatch: ", g.inf);
3013e25fdb51Smrg }
3014e25fdb51Smrg
3015e25fdb51Smrg /* show file information if requested */
3016cf2fd8adStls if (g.list) {
3017cf2fd8adStls g.in_tot = clen;
3018cf2fd8adStls show_info(8, check, g.out_tot, cont);
3019e25fdb51Smrg cont = 1;
3020e25fdb51Smrg }
3021e25fdb51Smrg
3022cf2fd8adStls /* if a gzip entry follows a gzip entry, decompress it (don't replace
3023cf2fd8adStls saved header information from first entry) */
3024cf2fd8adStls was = g.form;
3025cf2fd8adStls } while (was == 0 && (ret = get_header(0)) == 8 && g.form == 0);
3026cf2fd8adStls
3027cf2fd8adStls /* gzip -cdf copies junk after gzip stream directly to output */
3028cf2fd8adStls if (was == 0 && ret == -2 && g.force && g.pipeout && g.decode != 2 &&
3029cf2fd8adStls !g.list)
3030cf2fd8adStls cat();
3031cf2fd8adStls else if (was > 1 && get_header(0) != -5)
3032cf2fd8adStls complain("entries after the first in %s were ignored", g.inf);
3033cf2fd8adStls else if ((was == 0 && ret != -1) || (was == 1 && GET() != EOF))
3034cf2fd8adStls complain("%s OK, has trailing junk which was ignored", g.inf);
3035e25fdb51Smrg }
3036e25fdb51Smrg
3037e25fdb51Smrg /* --- decompress Unix compress (LZW) input --- */
3038e25fdb51Smrg
3039e25fdb51Smrg /* memory for unlzw() --
3040e25fdb51Smrg the first 256 entries of prefix[] and suffix[] are never used, could
3041e25fdb51Smrg have offset the index, but it's faster to waste the memory */
3042e25fdb51Smrg unsigned short prefix[65536]; /* index to LZW prefix string */
3043e25fdb51Smrg unsigned char suffix[65536]; /* one-character LZW suffix */
3044e25fdb51Smrg unsigned char match[65280 + 2]; /* buffer for reversed match */
3045e25fdb51Smrg
3046e25fdb51Smrg /* throw out what's left in the current bits byte buffer (this is a vestigial
3047e25fdb51Smrg aspect of the compressed data format derived from an implementation that
3048e25fdb51Smrg made use of a special VAX machine instruction!) */
3049e25fdb51Smrg #define FLUSHCODE() \
3050e25fdb51Smrg do { \
3051e25fdb51Smrg left = 0; \
3052e25fdb51Smrg rem = 0; \
3053cf2fd8adStls if (chunk > g.in_left) { \
3054cf2fd8adStls chunk -= g.in_left; \
3055e25fdb51Smrg if (load() == 0) \
3056e25fdb51Smrg break; \
3057cf2fd8adStls if (chunk > g.in_left) { \
3058cf2fd8adStls chunk = g.in_left = 0; \
3059e25fdb51Smrg break; \
3060e25fdb51Smrg } \
3061e25fdb51Smrg } \
3062cf2fd8adStls g.in_left -= chunk; \
3063cf2fd8adStls g.in_next += chunk; \
3064e25fdb51Smrg chunk = 0; \
3065e25fdb51Smrg } while (0)
3066e25fdb51Smrg
3067e25fdb51Smrg /* Decompress a compress (LZW) file from ind to outd. The compress magic
3068e25fdb51Smrg header (two bytes) has already been read and verified. */
unlzw(void)3069e25fdb51Smrg local void unlzw(void)
3070e25fdb51Smrg {
3071e25fdb51Smrg int got; /* byte just read by GET() */
3072e25fdb51Smrg unsigned chunk; /* bytes left in current chunk */
3073e25fdb51Smrg int left; /* bits left in rem */
3074e25fdb51Smrg unsigned rem; /* unused bits from input */
3075e25fdb51Smrg int bits; /* current bits per code */
3076e25fdb51Smrg unsigned code; /* code, table traversal index */
3077e25fdb51Smrg unsigned mask; /* mask for current bits codes */
3078e25fdb51Smrg int max; /* maximum bits per code for this stream */
3079e25fdb51Smrg int flags; /* compress flags, then block compress flag */
3080e25fdb51Smrg unsigned end; /* last valid entry in prefix/suffix tables */
3081e25fdb51Smrg unsigned temp; /* current code */
3082e25fdb51Smrg unsigned prev; /* previous code */
3083e25fdb51Smrg unsigned final; /* last character written for previous code */
3084e25fdb51Smrg unsigned stack; /* next position for reversed string */
3085e25fdb51Smrg unsigned outcnt; /* bytes in output buffer */
3086e25fdb51Smrg unsigned char *p;
3087e25fdb51Smrg
3088e25fdb51Smrg /* process remainder of compress header -- a flags byte */
3089cf2fd8adStls g.out_tot = 0;
3090e25fdb51Smrg flags = GET();
3091cf2fd8adStls if (g.in_eof)
3092cf2fd8adStls bail("missing lzw data: ", g.inf);
3093e25fdb51Smrg if (flags & 0x60)
3094cf2fd8adStls bail("unknown lzw flags set: ", g.inf);
3095e25fdb51Smrg max = flags & 0x1f;
3096e25fdb51Smrg if (max < 9 || max > 16)
3097cf2fd8adStls bail("lzw bits out of range: ", g.inf);
3098e25fdb51Smrg if (max == 9) /* 9 doesn't really mean 9 */
3099e25fdb51Smrg max = 10;
3100e25fdb51Smrg flags &= 0x80; /* true if block compress */
3101e25fdb51Smrg
3102e25fdb51Smrg /* clear table */
3103e25fdb51Smrg bits = 9;
3104e25fdb51Smrg mask = 0x1ff;
3105e25fdb51Smrg end = flags ? 256 : 255;
3106e25fdb51Smrg
3107e25fdb51Smrg /* set up: get first 9-bit code, which is the first decompressed byte, but
3108e25fdb51Smrg don't create a table entry until the next code */
3109e25fdb51Smrg got = GET();
3110cf2fd8adStls if (g.in_eof) /* no compressed data is ok */
3111e25fdb51Smrg return;
3112e25fdb51Smrg final = prev = (unsigned)got; /* low 8 bits of code */
3113e25fdb51Smrg got = GET();
3114cf2fd8adStls if (g.in_eof || (got & 1) != 0) /* missing a bit or code >= 256 */
3115cf2fd8adStls bail("invalid lzw code: ", g.inf);
3116e25fdb51Smrg rem = (unsigned)got >> 1; /* remaining 7 bits */
3117e25fdb51Smrg left = 7;
3118e25fdb51Smrg chunk = bits - 2; /* 7 bytes left in this chunk */
3119e25fdb51Smrg out_buf[0] = (unsigned char)final; /* write first decompressed byte */
3120e25fdb51Smrg outcnt = 1;
3121e25fdb51Smrg
3122e25fdb51Smrg /* decode codes */
3123e25fdb51Smrg stack = 0;
3124e25fdb51Smrg for (;;) {
3125e25fdb51Smrg /* if the table will be full after this, increment the code size */
3126e25fdb51Smrg if (end >= mask && bits < max) {
3127e25fdb51Smrg FLUSHCODE();
3128e25fdb51Smrg bits++;
3129e25fdb51Smrg mask <<= 1;
3130e25fdb51Smrg mask++;
3131e25fdb51Smrg }
3132e25fdb51Smrg
3133e25fdb51Smrg /* get a code of length bits */
3134e25fdb51Smrg if (chunk == 0) /* decrement chunk modulo bits */
3135e25fdb51Smrg chunk = bits;
3136e25fdb51Smrg code = rem; /* low bits of code */
3137e25fdb51Smrg got = GET();
3138cf2fd8adStls if (g.in_eof) { /* EOF is end of compressed data */
3139e25fdb51Smrg /* write remaining buffered output */
3140cf2fd8adStls g.out_tot += outcnt;
3141cf2fd8adStls if (outcnt && g.decode == 1)
3142cf2fd8adStls writen(g.outd, out_buf, outcnt);
3143e25fdb51Smrg return;
3144e25fdb51Smrg }
3145e25fdb51Smrg code += (unsigned)got << left; /* middle (or high) bits of code */
3146e25fdb51Smrg left += 8;
3147e25fdb51Smrg chunk--;
3148e25fdb51Smrg if (bits > left) { /* need more bits */
3149e25fdb51Smrg got = GET();
3150cf2fd8adStls if (g.in_eof) /* can't end in middle of code */
3151cf2fd8adStls bail("invalid lzw code: ", g.inf);
3152e25fdb51Smrg code += (unsigned)got << left; /* high bits of code */
3153e25fdb51Smrg left += 8;
3154e25fdb51Smrg chunk--;
3155e25fdb51Smrg }
3156e25fdb51Smrg code &= mask; /* mask to current code length */
3157e25fdb51Smrg left -= bits; /* number of unused bits */
3158e25fdb51Smrg rem = (unsigned)got >> (8 - left); /* unused bits from last byte */
3159e25fdb51Smrg
3160e25fdb51Smrg /* process clear code (256) */
3161e25fdb51Smrg if (code == 256 && flags) {
3162e25fdb51Smrg FLUSHCODE();
3163e25fdb51Smrg bits = 9; /* initialize bits and mask */
3164e25fdb51Smrg mask = 0x1ff;
3165e25fdb51Smrg end = 255; /* empty table */
3166e25fdb51Smrg continue; /* get next code */
3167e25fdb51Smrg }
3168e25fdb51Smrg
3169e25fdb51Smrg /* special code to reuse last match */
3170e25fdb51Smrg temp = code; /* save the current code */
3171e25fdb51Smrg if (code > end) {
3172e25fdb51Smrg /* Be picky on the allowed code here, and make sure that the code
3173e25fdb51Smrg we drop through (prev) will be a valid index so that random
3174e25fdb51Smrg input does not cause an exception. The code != end + 1 check is
3175e25fdb51Smrg empirically derived, and not checked in the original uncompress
3176e25fdb51Smrg code. If this ever causes a problem, that check could be safely
3177e25fdb51Smrg removed. Leaving this check in greatly improves pigz's ability
3178e25fdb51Smrg to detect random or corrupted input after a compress header.
3179e25fdb51Smrg In any case, the prev > end check must be retained. */
3180e25fdb51Smrg if (code != end + 1 || prev > end)
3181cf2fd8adStls bail("invalid lzw code: ", g.inf);
3182e25fdb51Smrg match[stack++] = (unsigned char)final;
3183e25fdb51Smrg code = prev;
3184e25fdb51Smrg }
3185e25fdb51Smrg
3186e25fdb51Smrg /* walk through linked list to generate output in reverse order */
3187e25fdb51Smrg p = match + stack;
3188e25fdb51Smrg while (code >= 256) {
3189e25fdb51Smrg *p++ = suffix[code];
3190e25fdb51Smrg code = prefix[code];
3191e25fdb51Smrg }
3192e25fdb51Smrg stack = p - match;
3193e25fdb51Smrg match[stack++] = (unsigned char)code;
3194e25fdb51Smrg final = code;
3195e25fdb51Smrg
3196e25fdb51Smrg /* link new table entry */
3197e25fdb51Smrg if (end < mask) {
3198e25fdb51Smrg end++;
3199e25fdb51Smrg prefix[end] = (unsigned short)prev;
3200e25fdb51Smrg suffix[end] = (unsigned char)final;
3201e25fdb51Smrg }
3202e25fdb51Smrg
3203e25fdb51Smrg /* set previous code for next iteration */
3204e25fdb51Smrg prev = temp;
3205e25fdb51Smrg
3206e25fdb51Smrg /* write output in forward order */
3207e25fdb51Smrg while (stack > OUTSIZE - outcnt) {
3208e25fdb51Smrg while (outcnt < OUTSIZE)
3209e25fdb51Smrg out_buf[outcnt++] = match[--stack];
3210cf2fd8adStls g.out_tot += outcnt;
3211cf2fd8adStls if (g.decode == 1)
3212cf2fd8adStls writen(g.outd, out_buf, outcnt);
3213e25fdb51Smrg outcnt = 0;
3214e25fdb51Smrg }
3215e25fdb51Smrg p = match + stack;
3216e25fdb51Smrg do {
3217e25fdb51Smrg out_buf[outcnt++] = *--p;
3218e25fdb51Smrg } while (p > match);
3219e25fdb51Smrg stack = 0;
3220e25fdb51Smrg
3221e25fdb51Smrg /* loop for next code with final and prev as the last match, rem and
3222e25fdb51Smrg left provide the first 0..7 bits of the next code, end is the last
3223e25fdb51Smrg valid table entry */
3224e25fdb51Smrg }
3225e25fdb51Smrg }
3226e25fdb51Smrg
3227e25fdb51Smrg /* --- file processing --- */
3228e25fdb51Smrg
3229e25fdb51Smrg /* extract file name from path */
justname(char * path)3230e25fdb51Smrg local char *justname(char *path)
3231e25fdb51Smrg {
3232e25fdb51Smrg char *p;
3233e25fdb51Smrg
3234e25fdb51Smrg p = path + strlen(path);
3235e25fdb51Smrg while (--p >= path)
3236e25fdb51Smrg if (*p == '/')
3237e25fdb51Smrg break;
3238e25fdb51Smrg return p + 1;
3239e25fdb51Smrg }
3240e25fdb51Smrg
3241e25fdb51Smrg /* Copy file attributes, from -> to, as best we can. This is best effort, so
3242e25fdb51Smrg no errors are reported. The mode bits, including suid, sgid, and the sticky
3243e25fdb51Smrg bit are copied (if allowed), the owner's user id and group id are copied
3244e25fdb51Smrg (again if allowed), and the access and modify times are copied. */
copymeta(char * from,char * to)3245e25fdb51Smrg local void copymeta(char *from, char *to)
3246e25fdb51Smrg {
3247e25fdb51Smrg struct stat st;
3248e25fdb51Smrg struct timeval times[2];
3249e25fdb51Smrg
3250e25fdb51Smrg /* get all of from's Unix meta data, return if not a regular file */
3251e25fdb51Smrg if (stat(from, &st) != 0 || (st.st_mode & S_IFMT) != S_IFREG)
3252e25fdb51Smrg return;
3253e25fdb51Smrg
3254e25fdb51Smrg /* set to's mode bits, ignore errors */
3255cf2fd8adStls (void)chmod(to, st.st_mode & 07777);
3256e25fdb51Smrg
3257e25fdb51Smrg /* copy owner's user and group, ignore errors */
3258cf2fd8adStls (void)chown(to, st.st_uid, st.st_gid);
3259e25fdb51Smrg
3260e25fdb51Smrg /* copy access and modify times, ignore errors */
3261e25fdb51Smrg times[0].tv_sec = st.st_atime;
3262e25fdb51Smrg times[0].tv_usec = 0;
3263e25fdb51Smrg times[1].tv_sec = st.st_mtime;
3264e25fdb51Smrg times[1].tv_usec = 0;
3265cf2fd8adStls (void)utimes(to, times);
3266e25fdb51Smrg }
3267e25fdb51Smrg
3268e25fdb51Smrg /* set the access and modify times of fd to t */
touch(char * path,time_t t)3269e25fdb51Smrg local void touch(char *path, time_t t)
3270e25fdb51Smrg {
3271e25fdb51Smrg struct timeval times[2];
3272e25fdb51Smrg
3273e25fdb51Smrg times[0].tv_sec = t;
3274e25fdb51Smrg times[0].tv_usec = 0;
3275e25fdb51Smrg times[1].tv_sec = t;
3276e25fdb51Smrg times[1].tv_usec = 0;
3277cf2fd8adStls (void)utimes(path, times);
3278e25fdb51Smrg }
3279e25fdb51Smrg
3280e25fdb51Smrg /* process provided input file, or stdin if path is NULL -- process() can
3281e25fdb51Smrg call itself for recursive directory processing */
process(char * path)3282e25fdb51Smrg local void process(char *path)
3283e25fdb51Smrg {
3284e25fdb51Smrg int method = -1; /* get_header() return value */
3285e25fdb51Smrg size_t len; /* length of base name (minus suffix) */
3286e25fdb51Smrg struct stat st; /* to get file type and mod time */
3287cf2fd8adStls /* all compressed suffixes for decoding search, in length order */
3288cf2fd8adStls static char *sufs[] = {".z", "-z", "_z", ".Z", ".gz", "-gz", ".zz", "-zz",
3289cf2fd8adStls ".zip", ".ZIP", ".tgz", NULL};
3290e25fdb51Smrg
3291e25fdb51Smrg /* open input file with name in, descriptor ind -- set name and mtime */
3292e25fdb51Smrg if (path == NULL) {
3293cf2fd8adStls strcpy(g.inf, "<stdin>");
3294cf2fd8adStls g.ind = 0;
3295cf2fd8adStls g.name = NULL;
3296cf2fd8adStls g.mtime = g.headis & 2 ?
3297cf2fd8adStls (fstat(g.ind, &st) ? time(NULL) : st.st_mtime) : 0;
3298e25fdb51Smrg len = 0;
3299e25fdb51Smrg }
3300e25fdb51Smrg else {
3301e25fdb51Smrg /* set input file name (already set if recursed here) */
3302cf2fd8adStls if (path != g.inf) {
3303cf2fd8adStls strncpy(g.inf, path, sizeof(g.inf));
3304cf2fd8adStls if (g.inf[sizeof(g.inf) - 1])
3305e25fdb51Smrg bail("name too long: ", path);
3306e25fdb51Smrg }
3307cf2fd8adStls len = strlen(g.inf);
3308cf2fd8adStls
3309cf2fd8adStls /* try to stat input file -- if not there and decoding, look for that
3310cf2fd8adStls name with compressed suffixes */
3311cf2fd8adStls if (lstat(g.inf, &st)) {
3312cf2fd8adStls if (errno == ENOENT && (g.list || g.decode)) {
3313cf2fd8adStls char **try = sufs;
3314cf2fd8adStls do {
3315cf2fd8adStls if (*try == NULL || len + strlen(*try) >= sizeof(g.inf))
3316cf2fd8adStls break;
3317cf2fd8adStls strcpy(g.inf + len, *try++);
3318cf2fd8adStls errno = 0;
3319cf2fd8adStls } while (lstat(g.inf, &st) && errno == ENOENT);
3320cf2fd8adStls }
3321cf2fd8adStls #ifdef EOVERFLOW
3322cf2fd8adStls if (errno == EOVERFLOW || errno == EFBIG)
3323cf2fd8adStls bail(g.inf,
3324cf2fd8adStls " too large -- not compiled with large file support");
3325cf2fd8adStls #endif
3326cf2fd8adStls if (errno) {
3327cf2fd8adStls g.inf[len] = 0;
3328cf2fd8adStls complain("%s does not exist -- skipping", g.inf);
3329cf2fd8adStls return;
3330cf2fd8adStls }
3331cf2fd8adStls len = strlen(g.inf);
3332cf2fd8adStls }
3333e25fdb51Smrg
3334e25fdb51Smrg /* only process regular files, but allow symbolic links if -f,
3335e25fdb51Smrg recurse into directory if -r */
3336e25fdb51Smrg if ((st.st_mode & S_IFMT) != S_IFREG &&
3337e25fdb51Smrg (st.st_mode & S_IFMT) != S_IFLNK &&
3338e25fdb51Smrg (st.st_mode & S_IFMT) != S_IFDIR) {
3339cf2fd8adStls complain("%s is a special file or device -- skipping", g.inf);
3340e25fdb51Smrg return;
3341e25fdb51Smrg }
3342cf2fd8adStls if ((st.st_mode & S_IFMT) == S_IFLNK && !g.force && !g.pipeout) {
3343cf2fd8adStls complain("%s is a symbolic link -- skipping", g.inf);
3344e25fdb51Smrg return;
3345e25fdb51Smrg }
3346cf2fd8adStls if ((st.st_mode & S_IFMT) == S_IFDIR && !g.recurse) {
3347cf2fd8adStls complain("%s is a directory -- skipping", g.inf);
3348e25fdb51Smrg return;
3349e25fdb51Smrg }
3350e25fdb51Smrg
3351e25fdb51Smrg /* recurse into directory (assumes Unix) */
3352e25fdb51Smrg if ((st.st_mode & S_IFMT) == S_IFDIR) {
3353e25fdb51Smrg char *roll, *item, *cut, *base, *bigger;
3354e25fdb51Smrg size_t len, hold;
3355e25fdb51Smrg DIR *here;
3356e25fdb51Smrg struct dirent *next;
3357e25fdb51Smrg
3358e25fdb51Smrg /* accumulate list of entries (need to do this, since readdir()
3359e25fdb51Smrg behavior not defined if directory modified between calls) */
3360cf2fd8adStls here = opendir(g.inf);
3361e25fdb51Smrg if (here == NULL)
3362e25fdb51Smrg return;
3363e25fdb51Smrg hold = 512;
3364cf2fd8adStls roll = MALLOC(hold);
3365e25fdb51Smrg if (roll == NULL)
3366e25fdb51Smrg bail("not enough memory", "");
3367e25fdb51Smrg *roll = 0;
3368e25fdb51Smrg item = roll;
3369e25fdb51Smrg while ((next = readdir(here)) != NULL) {
3370e25fdb51Smrg if (next->d_name[0] == 0 ||
3371e25fdb51Smrg (next->d_name[0] == '.' && (next->d_name[1] == 0 ||
3372e25fdb51Smrg (next->d_name[1] == '.' && next->d_name[2] == 0))))
3373e25fdb51Smrg continue;
3374e25fdb51Smrg len = strlen(next->d_name) + 1;
3375e25fdb51Smrg if (item + len + 1 > roll + hold) {
3376e25fdb51Smrg do { /* make roll bigger */
3377e25fdb51Smrg hold <<= 1;
3378e25fdb51Smrg } while (item + len + 1 > roll + hold);
3379cf2fd8adStls bigger = REALLOC(roll, hold);
3380e25fdb51Smrg if (bigger == NULL) {
3381cf2fd8adStls FREE(roll);
3382e25fdb51Smrg bail("not enough memory", "");
3383e25fdb51Smrg }
3384e25fdb51Smrg item = bigger + (item - roll);
3385e25fdb51Smrg roll = bigger;
3386e25fdb51Smrg }
3387e25fdb51Smrg strcpy(item, next->d_name);
3388e25fdb51Smrg item += len;
3389e25fdb51Smrg *item = 0;
3390e25fdb51Smrg }
3391e25fdb51Smrg closedir(here);
3392e25fdb51Smrg
3393e25fdb51Smrg /* run process() for each entry in the directory */
3394cf2fd8adStls cut = base = g.inf + strlen(g.inf);
3395cf2fd8adStls if (base > g.inf && base[-1] != (unsigned char)'/') {
3396cf2fd8adStls if ((size_t)(base - g.inf) >= sizeof(g.inf))
3397cf2fd8adStls bail("path too long", g.inf);
3398e25fdb51Smrg *base++ = '/';
3399e25fdb51Smrg }
3400e25fdb51Smrg item = roll;
3401e25fdb51Smrg while (*item) {
3402cf2fd8adStls strncpy(base, item, sizeof(g.inf) - (base - g.inf));
3403cf2fd8adStls if (g.inf[sizeof(g.inf) - 1]) {
3404cf2fd8adStls strcpy(g.inf + (sizeof(g.inf) - 4), "...");
3405cf2fd8adStls bail("path too long: ", g.inf);
3406e25fdb51Smrg }
3407cf2fd8adStls process(g.inf);
3408e25fdb51Smrg item += strlen(item) + 1;
3409e25fdb51Smrg }
3410e25fdb51Smrg *cut = 0;
3411e25fdb51Smrg
3412e25fdb51Smrg /* release list of entries */
3413cf2fd8adStls FREE(roll);
3414e25fdb51Smrg return;
3415e25fdb51Smrg }
3416e25fdb51Smrg
3417e25fdb51Smrg /* don't compress .gz (or provided suffix) files, unless -f */
3418cf2fd8adStls if (!(g.force || g.list || g.decode) && len >= strlen(g.sufx) &&
3419cf2fd8adStls strcmp(g.inf + len - strlen(g.sufx), g.sufx) == 0) {
3420cf2fd8adStls complain("%s ends with %s -- skipping", g.inf, g.sufx);
3421e25fdb51Smrg return;
3422e25fdb51Smrg }
3423e25fdb51Smrg
3424cf2fd8adStls /* create output file only if input file has compressed suffix */
3425cf2fd8adStls if (g.decode == 1 && !g.pipeout && !g.list) {
3426cf2fd8adStls int suf = compressed_suffix(g.inf);
3427e25fdb51Smrg if (suf == 0) {
3428cf2fd8adStls complain("%s does not have compressed suffix -- skipping",
3429cf2fd8adStls g.inf);
3430e25fdb51Smrg return;
3431e25fdb51Smrg }
3432e25fdb51Smrg len -= suf;
3433e25fdb51Smrg }
3434e25fdb51Smrg
3435e25fdb51Smrg /* open input file */
3436cf2fd8adStls g.ind = open(g.inf, O_RDONLY, 0);
3437cf2fd8adStls if (g.ind < 0)
3438cf2fd8adStls bail("read error on ", g.inf);
3439e25fdb51Smrg
3440e25fdb51Smrg /* prepare gzip header information for compression */
3441cf2fd8adStls g.name = g.headis & 1 ? justname(g.inf) : NULL;
3442cf2fd8adStls g.mtime = g.headis & 2 ? st.st_mtime : 0;
3443e25fdb51Smrg }
3444cf2fd8adStls SET_BINARY_MODE(g.ind);
3445e25fdb51Smrg
3446e25fdb51Smrg /* if decoding or testing, try to read gzip header */
3447cf2fd8adStls g.hname = NULL;
3448cf2fd8adStls if (g.decode) {
3449e25fdb51Smrg in_init();
3450e25fdb51Smrg method = get_header(1);
3451cf2fd8adStls if (method != 8 && method != 257 &&
3452cf2fd8adStls /* gzip -cdf acts like cat on uncompressed input */
3453cf2fd8adStls !(method == -2 && g.force && g.pipeout && g.decode != 2 &&
3454cf2fd8adStls !g.list)) {
3455cf2fd8adStls RELEASE(g.hname);
3456cf2fd8adStls if (g.ind != 0)
3457cf2fd8adStls close(g.ind);
3458cf2fd8adStls if (method != -1)
3459cf2fd8adStls complain(method < 0 ? "%s is not compressed -- skipping" :
3460cf2fd8adStls "%s has unknown compression method -- skipping",
3461cf2fd8adStls g.inf);
3462e25fdb51Smrg return;
3463e25fdb51Smrg }
3464e25fdb51Smrg
3465e25fdb51Smrg /* if requested, test input file (possibly a special list) */
3466cf2fd8adStls if (g.decode == 2) {
3467e25fdb51Smrg if (method == 8)
3468e25fdb51Smrg infchk();
3469e25fdb51Smrg else {
3470e25fdb51Smrg unlzw();
3471cf2fd8adStls if (g.list) {
3472cf2fd8adStls g.in_tot -= 3;
3473cf2fd8adStls show_info(method, 0, g.out_tot, 0);
3474e25fdb51Smrg }
3475e25fdb51Smrg }
3476cf2fd8adStls RELEASE(g.hname);
3477cf2fd8adStls if (g.ind != 0)
3478cf2fd8adStls close(g.ind);
3479e25fdb51Smrg return;
3480e25fdb51Smrg }
3481e25fdb51Smrg }
3482e25fdb51Smrg
3483e25fdb51Smrg /* if requested, just list information about input file */
3484cf2fd8adStls if (g.list) {
3485e25fdb51Smrg list_info();
3486cf2fd8adStls RELEASE(g.hname);
3487cf2fd8adStls if (g.ind != 0)
3488cf2fd8adStls close(g.ind);
3489e25fdb51Smrg return;
3490e25fdb51Smrg }
3491e25fdb51Smrg
3492e25fdb51Smrg /* create output file out, descriptor outd */
3493cf2fd8adStls if (path == NULL || g.pipeout) {
3494e25fdb51Smrg /* write to stdout */
3495cf2fd8adStls g.outf = MALLOC(strlen("<stdout>") + 1);
3496cf2fd8adStls if (g.outf == NULL)
3497e25fdb51Smrg bail("not enough memory", "");
3498cf2fd8adStls strcpy(g.outf, "<stdout>");
3499cf2fd8adStls g.outd = 1;
3500cf2fd8adStls if (!g.decode && !g.force && isatty(g.outd))
3501e25fdb51Smrg bail("trying to write compressed data to a terminal",
3502e25fdb51Smrg " (use -f to force)");
3503e25fdb51Smrg }
3504e25fdb51Smrg else {
3505*58d602d3Smrg char *to = g.inf, *sufx = "";
3506*58d602d3Smrg size_t pre = 0;
3507e25fdb51Smrg
3508*58d602d3Smrg /* select parts of the output file name */
3509*58d602d3Smrg if (g.decode) {
3510*58d602d3Smrg /* for -dN or -dNT, use the path from the input file and the name
3511*58d602d3Smrg from the header, stripping any path in the header name */
3512*58d602d3Smrg if ((g.headis & 1) != 0 && g.hname != NULL) {
3513*58d602d3Smrg pre = justname(g.inf) - g.inf;
3514*58d602d3Smrg to = justname(g.hname);
3515*58d602d3Smrg len = strlen(to);
3516e25fdb51Smrg }
3517*58d602d3Smrg /* for -d or -dNn, replace abbreviated suffixes */
3518*58d602d3Smrg else if (strcmp(to + len, ".tgz") == 0)
3519*58d602d3Smrg sufx = ".tar";
3520*58d602d3Smrg }
3521*58d602d3Smrg else
3522*58d602d3Smrg /* add appropriate suffix when compressing */
3523*58d602d3Smrg sufx = g.sufx;
3524cf2fd8adStls
3525e25fdb51Smrg /* create output file and open to write */
3526*58d602d3Smrg g.outf = MALLOC(pre + len + strlen(sufx) + 1);
3527cf2fd8adStls if (g.outf == NULL)
3528e25fdb51Smrg bail("not enough memory", "");
3529*58d602d3Smrg memcpy(g.outf, g.inf, pre);
3530*58d602d3Smrg memcpy(g.outf + pre, to, len);
3531*58d602d3Smrg strcpy(g.outf + pre + len, sufx);
3532cf2fd8adStls g.outd = open(g.outf, O_CREAT | O_TRUNC | O_WRONLY |
3533cf2fd8adStls (g.force ? 0 : O_EXCL), 0600);
3534e25fdb51Smrg
3535e25fdb51Smrg /* if exists and not -f, give user a chance to overwrite */
3536cf2fd8adStls if (g.outd < 0 && errno == EEXIST && isatty(0) && g.verbosity) {
3537e25fdb51Smrg int ch, reply;
3538e25fdb51Smrg
3539cf2fd8adStls fprintf(stderr, "%s exists -- overwrite (y/n)? ", g.outf);
3540e25fdb51Smrg fflush(stderr);
3541e25fdb51Smrg reply = -1;
3542e25fdb51Smrg do {
3543e25fdb51Smrg ch = getchar();
3544e25fdb51Smrg if (reply < 0 && ch != ' ' && ch != '\t')
3545e25fdb51Smrg reply = ch == 'y' || ch == 'Y' ? 1 : 0;
3546e25fdb51Smrg } while (ch != EOF && ch != '\n' && ch != '\r');
3547e25fdb51Smrg if (reply == 1)
3548cf2fd8adStls g.outd = open(g.outf, O_CREAT | O_TRUNC | O_WRONLY,
3549cf2fd8adStls 0600);
3550e25fdb51Smrg }
3551e25fdb51Smrg
3552e25fdb51Smrg /* if exists and no overwrite, report and go on to next */
3553cf2fd8adStls if (g.outd < 0 && errno == EEXIST) {
3554cf2fd8adStls complain("%s exists -- skipping", g.outf);
3555cf2fd8adStls RELEASE(g.outf);
3556cf2fd8adStls RELEASE(g.hname);
3557cf2fd8adStls if (g.ind != 0)
3558cf2fd8adStls close(g.ind);
3559e25fdb51Smrg return;
3560e25fdb51Smrg }
3561e25fdb51Smrg
3562e25fdb51Smrg /* if some other error, give up */
3563cf2fd8adStls if (g.outd < 0)
3564cf2fd8adStls bail("write error on ", g.outf);
3565e25fdb51Smrg }
3566cf2fd8adStls SET_BINARY_MODE(g.outd);
3567cf2fd8adStls RELEASE(g.hname);
3568e25fdb51Smrg
3569e25fdb51Smrg /* process ind to outd */
3570cf2fd8adStls if (g.verbosity > 1)
3571cf2fd8adStls fprintf(stderr, "%s to %s ", g.inf, g.outf);
3572cf2fd8adStls if (g.decode) {
3573e25fdb51Smrg if (method == 8)
3574e25fdb51Smrg infchk();
3575cf2fd8adStls else if (method == 257)
3576e25fdb51Smrg unlzw();
3577cf2fd8adStls else
3578cf2fd8adStls cat();
3579e25fdb51Smrg }
3580e25fdb51Smrg #ifndef NOTHREAD
3581cf2fd8adStls else if (g.procs > 1)
3582e25fdb51Smrg parallel_compress();
3583e25fdb51Smrg #endif
3584e25fdb51Smrg else
3585e25fdb51Smrg single_compress(0);
3586cf2fd8adStls if (g.verbosity > 1) {
3587e25fdb51Smrg putc('\n', stderr);
3588e25fdb51Smrg fflush(stderr);
3589e25fdb51Smrg }
3590e25fdb51Smrg
3591e25fdb51Smrg /* finish up, copy attributes, set times, delete original */
3592cf2fd8adStls if (g.ind != 0)
3593cf2fd8adStls close(g.ind);
3594cf2fd8adStls if (g.outd != 1) {
3595cf2fd8adStls if (close(g.outd))
3596cf2fd8adStls bail("write error on ", g.outf);
3597cf2fd8adStls g.outd = -1; /* now prevent deletion on interrupt */
3598cf2fd8adStls if (g.ind != 0) {
3599cf2fd8adStls copymeta(g.inf, g.outf);
3600cf2fd8adStls if (!g.keep)
3601cf2fd8adStls unlink(g.inf);
3602e25fdb51Smrg }
3603cf2fd8adStls if (g.decode && (g.headis & 2) != 0 && g.stamp)
3604cf2fd8adStls touch(g.outf, g.stamp);
3605e25fdb51Smrg }
3606cf2fd8adStls RELEASE(g.outf);
3607e25fdb51Smrg }
3608e25fdb51Smrg
3609e25fdb51Smrg local char *helptext[] = {
3610e25fdb51Smrg "Usage: pigz [options] [files ...]",
3611e25fdb51Smrg " will compress files in place, adding the suffix '.gz'. If no files are",
3612e25fdb51Smrg #ifdef NOTHREAD
3613e25fdb51Smrg " specified, stdin will be compressed to stdout. pigz does what gzip does.",
3614e25fdb51Smrg #else
3615e25fdb51Smrg " specified, stdin will be compressed to stdout. pigz does what gzip does,",
3616e25fdb51Smrg " but spreads the work over multiple processors and cores when compressing.",
3617e25fdb51Smrg #endif
3618e25fdb51Smrg "",
3619e25fdb51Smrg "Options:",
3620cf2fd8adStls " -0 to -9, -11 Compression level (11 is much slower, a few % better)",
3621cf2fd8adStls " --fast, --best Compression levels 1 and 9 respectively",
3622e25fdb51Smrg " -b, --blocksize mmm Set compression block size to mmmK (default 128K)",
3623cf2fd8adStls " -c, --stdout Write all processed output to stdout (won't delete)",
3624cf2fd8adStls " -d, --decompress Decompress the compressed input",
3625cf2fd8adStls " -f, --force Force overwrite, compress .gz, links, and to terminal",
3626cf2fd8adStls " -F --first Do iterations first, before block split for -11",
3627cf2fd8adStls " -h, --help Display a help screen and quit",
3628cf2fd8adStls " -i, --independent Compress blocks independently for damage recovery",
3629cf2fd8adStls " -I, --iterations n Number of iterations for -11 optimization",
3630cf2fd8adStls " -k, --keep Do not delete original file after processing",
3631cf2fd8adStls " -K, --zip Compress to PKWare zip (.zip) single entry format",
3632cf2fd8adStls " -l, --list List the contents of the compressed input",
3633cf2fd8adStls " -L, --license Display the pigz license and quit",
3634cf2fd8adStls " -M, --maxsplits n Maximum number of split blocks for -11",
3635cf2fd8adStls " -n, --no-name Do not store or restore file name in/from header",
3636cf2fd8adStls " -N, --name Store/restore file name and mod time in/from header",
3637cf2fd8adStls " -O --oneblock Do not split into smaller blocks for -11",
3638e25fdb51Smrg #ifndef NOTHREAD
3639e25fdb51Smrg " -p, --processes n Allow up to n compression threads (default is the",
3640e25fdb51Smrg " number of online processors, or 8 if unknown)",
3641e25fdb51Smrg #endif
3642e25fdb51Smrg " -q, --quiet Print no messages, even on error",
3643cf2fd8adStls " -r, --recursive Process the contents of all subdirectories",
3644cf2fd8adStls " -R, --rsyncable Input-determined block locations for rsync",
3645cf2fd8adStls " -S, --suffix .sss Use suffix .sss instead of .gz (for compression)",
3646cf2fd8adStls " -t, --test Test the integrity of the compressed input",
3647cf2fd8adStls " -T, --no-time Do not store or restore mod time in/from header",
3648e25fdb51Smrg #ifdef DEBUG
3649cf2fd8adStls " -v, --verbose Provide more verbose output (-vv to debug)",
3650e25fdb51Smrg #else
3651cf2fd8adStls " -v, --verbose Provide more verbose output",
3652e25fdb51Smrg #endif
3653cf2fd8adStls " -V --version Show the version of pigz",
3654cf2fd8adStls " -z, --zlib Compress to zlib (.zz) instead of gzip format",
3655cf2fd8adStls " -- All arguments after \"--\" are treated as files"
3656e25fdb51Smrg };
3657e25fdb51Smrg
3658e25fdb51Smrg /* display the help text above */
help(void)3659e25fdb51Smrg local void help(void)
3660e25fdb51Smrg {
3661e25fdb51Smrg int n;
3662e25fdb51Smrg
3663cf2fd8adStls if (g.verbosity == 0)
3664e25fdb51Smrg return;
3665e25fdb51Smrg for (n = 0; n < (int)(sizeof(helptext) / sizeof(char *)); n++)
3666e25fdb51Smrg fprintf(stderr, "%s\n", helptext[n]);
3667e25fdb51Smrg fflush(stderr);
3668e25fdb51Smrg exit(0);
3669e25fdb51Smrg }
3670e25fdb51Smrg
3671e25fdb51Smrg #ifndef NOTHREAD
3672e25fdb51Smrg
3673e25fdb51Smrg /* try to determine the number of processors */
nprocs(int n)3674e25fdb51Smrg local int nprocs(int n)
3675e25fdb51Smrg {
3676e25fdb51Smrg # ifdef _SC_NPROCESSORS_ONLN
3677e25fdb51Smrg n = (int)sysconf(_SC_NPROCESSORS_ONLN);
3678e25fdb51Smrg # else
3679e25fdb51Smrg # ifdef _SC_NPROC_ONLN
3680e25fdb51Smrg n = (int)sysconf(_SC_NPROC_ONLN);
3681cf2fd8adStls # else
3682cf2fd8adStls # ifdef __hpux
3683cf2fd8adStls struct pst_dynamic psd;
3684cf2fd8adStls
3685cf2fd8adStls if (pstat_getdynamic(&psd, sizeof(psd), (size_t)1, 0) != -1)
3686cf2fd8adStls n = psd.psd_proc_cnt;
3687cf2fd8adStls # endif
3688e25fdb51Smrg # endif
3689e25fdb51Smrg # endif
3690e25fdb51Smrg return n;
3691e25fdb51Smrg }
3692e25fdb51Smrg
3693e25fdb51Smrg #endif
3694e25fdb51Smrg
3695e25fdb51Smrg /* set option defaults */
defaults(void)3696e25fdb51Smrg local void defaults(void)
3697e25fdb51Smrg {
3698cf2fd8adStls g.level = Z_DEFAULT_COMPRESSION;
3699cf2fd8adStls /* default zopfli options as set by ZopfliInitOptions():
3700cf2fd8adStls verbose = 0
3701cf2fd8adStls numiterations = 15
3702cf2fd8adStls blocksplitting = 1
3703cf2fd8adStls blocksplittinglast = 0
3704cf2fd8adStls blocksplittingmax = 15
3705cf2fd8adStls */
3706cf2fd8adStls ZopfliInitOptions(&g.zopts);
3707e25fdb51Smrg #ifdef NOTHREAD
3708cf2fd8adStls g.procs = 1;
3709e25fdb51Smrg #else
3710cf2fd8adStls g.procs = nprocs(8);
3711e25fdb51Smrg #endif
3712cf2fd8adStls g.block = 131072UL; /* 128K */
3713cf2fd8adStls g.rsync = 0; /* don't do rsync blocking */
3714cf2fd8adStls g.setdict = 1; /* initialize dictionary each thread */
3715cf2fd8adStls g.verbosity = 1; /* normal message level */
3716cf2fd8adStls g.headis = 3; /* store/restore name and timestamp */
3717cf2fd8adStls g.pipeout = 0; /* don't force output to stdout */
3718cf2fd8adStls g.sufx = ".gz"; /* compressed file suffix */
3719cf2fd8adStls g.decode = 0; /* compress */
3720cf2fd8adStls g.list = 0; /* compress */
3721cf2fd8adStls g.keep = 0; /* delete input file once compressed */
3722cf2fd8adStls g.force = 0; /* don't overwrite, don't compress links */
3723cf2fd8adStls g.recurse = 0; /* don't go into directories */
3724cf2fd8adStls g.form = 0; /* use gzip format */
3725e25fdb51Smrg }
3726e25fdb51Smrg
3727e25fdb51Smrg /* long options conversion to short options */
3728e25fdb51Smrg local char *longopts[][2] = {
3729e25fdb51Smrg {"LZW", "Z"}, {"ascii", "a"}, {"best", "9"}, {"bits", "Z"},
3730cf2fd8adStls {"blocksize", "b"}, {"decompress", "d"}, {"fast", "1"}, {"first", "F"},
3731cf2fd8adStls {"force", "f"}, {"help", "h"}, {"independent", "i"}, {"iterations", "I"},
3732cf2fd8adStls {"keep", "k"}, {"license", "L"}, {"list", "l"}, {"maxsplits", "M"},
3733cf2fd8adStls {"name", "N"}, {"no-name", "n"}, {"no-time", "T"}, {"oneblock", "O"},
3734e25fdb51Smrg {"processes", "p"}, {"quiet", "q"}, {"recursive", "r"}, {"rsyncable", "R"},
3735e25fdb51Smrg {"silent", "q"}, {"stdout", "c"}, {"suffix", "S"}, {"test", "t"},
3736e25fdb51Smrg {"to-stdout", "c"}, {"uncompress", "d"}, {"verbose", "v"},
3737e25fdb51Smrg {"version", "V"}, {"zip", "K"}, {"zlib", "z"}};
3738e25fdb51Smrg #define NLOPTS (sizeof(longopts) / (sizeof(char *) << 1))
3739e25fdb51Smrg
3740e25fdb51Smrg /* either new buffer size, new compression level, or new number of processes --
3741e25fdb51Smrg get rid of old buffers and threads to force the creation of new ones with
3742e25fdb51Smrg the new settings */
new_opts(void)3743e25fdb51Smrg local void new_opts(void)
3744e25fdb51Smrg {
3745e25fdb51Smrg single_compress(1);
3746e25fdb51Smrg #ifndef NOTHREAD
3747e25fdb51Smrg finish_jobs();
3748e25fdb51Smrg #endif
3749e25fdb51Smrg }
3750e25fdb51Smrg
3751e25fdb51Smrg /* verify that arg is only digits, and if so, return the decimal value */
num(char * arg)3752e25fdb51Smrg local size_t num(char *arg)
3753e25fdb51Smrg {
3754e25fdb51Smrg char *str = arg;
3755e25fdb51Smrg size_t val = 0;
3756e25fdb51Smrg
3757e25fdb51Smrg if (*str == 0)
3758e25fdb51Smrg bail("internal error: empty parameter", "");
3759e25fdb51Smrg do {
3760cf2fd8adStls if (*str < '0' || *str > '9' ||
3761cf2fd8adStls (val && ((~(size_t)0) - (*str - '0')) / val < 10))
3762e25fdb51Smrg bail("invalid numeric parameter: ", arg);
3763e25fdb51Smrg val = val * 10 + (*str - '0');
3764e25fdb51Smrg } while (*++str);
3765e25fdb51Smrg return val;
3766e25fdb51Smrg }
3767e25fdb51Smrg
3768e25fdb51Smrg /* process an option, return true if a file name and not an option */
option(char * arg)3769e25fdb51Smrg local int option(char *arg)
3770e25fdb51Smrg {
3771e25fdb51Smrg static int get = 0; /* if not zero, look for option parameter */
3772e25fdb51Smrg char bad[3] = "-X"; /* for error messages (X is replaced) */
3773e25fdb51Smrg
3774e25fdb51Smrg /* if no argument or dash option, check status of get */
3775e25fdb51Smrg if (get && (arg == NULL || *arg == '-')) {
3776cf2fd8adStls bad[1] = "bpSIM"[get - 1];
3777e25fdb51Smrg bail("missing parameter after ", bad);
3778e25fdb51Smrg }
3779e25fdb51Smrg if (arg == NULL)
3780e25fdb51Smrg return 0;
3781e25fdb51Smrg
3782e25fdb51Smrg /* process long option or short options */
3783e25fdb51Smrg if (*arg == '-') {
3784e25fdb51Smrg /* a single dash will be interpreted as stdin */
3785e25fdb51Smrg if (*++arg == 0)
3786e25fdb51Smrg return 1;
3787e25fdb51Smrg
3788e25fdb51Smrg /* process long option (fall through with equivalent short option) */
3789e25fdb51Smrg if (*arg == '-') {
3790e25fdb51Smrg int j;
3791e25fdb51Smrg
3792e25fdb51Smrg arg++;
3793e25fdb51Smrg for (j = NLOPTS - 1; j >= 0; j--)
3794e25fdb51Smrg if (strcmp(arg, longopts[j][0]) == 0) {
3795e25fdb51Smrg arg = longopts[j][1];
3796e25fdb51Smrg break;
3797e25fdb51Smrg }
3798e25fdb51Smrg if (j < 0)
3799e25fdb51Smrg bail("invalid option: ", arg - 2);
3800e25fdb51Smrg }
3801e25fdb51Smrg
3802e25fdb51Smrg /* process short options (more than one allowed after dash) */
3803e25fdb51Smrg do {
3804e25fdb51Smrg /* if looking for a parameter, don't process more single character
3805e25fdb51Smrg options until we have the parameter */
3806e25fdb51Smrg if (get) {
3807e25fdb51Smrg if (get == 3)
3808e25fdb51Smrg bail("invalid usage: -s must be followed by space", "");
3809e25fdb51Smrg break; /* allow -pnnn and -bnnn, fall to parameter code */
3810e25fdb51Smrg }
3811e25fdb51Smrg
3812cf2fd8adStls /* process next single character option or compression level */
3813e25fdb51Smrg bad[1] = *arg;
3814e25fdb51Smrg switch (*arg) {
3815e25fdb51Smrg case '0': case '1': case '2': case '3': case '4':
3816e25fdb51Smrg case '5': case '6': case '7': case '8': case '9':
3817cf2fd8adStls g.level = *arg - '0';
3818cf2fd8adStls while (arg[1] >= '0' && arg[1] <= '9') {
3819cf2fd8adStls if (g.level && (INT_MAX - (arg[1] - '0')) / g.level < 10)
3820cf2fd8adStls bail("only levels 0..9 and 11 are allowed", "");
3821cf2fd8adStls g.level = g.level * 10 + *++arg - '0';
3822cf2fd8adStls }
3823cf2fd8adStls if (g.level == 10 || g.level > 11)
3824cf2fd8adStls bail("only levels 0..9 and 11 are allowed", "");
3825e25fdb51Smrg new_opts();
3826e25fdb51Smrg break;
3827cf2fd8adStls case 'F': g.zopts.blocksplittinglast = 1; break;
3828cf2fd8adStls case 'I': get = 4; break;
3829cf2fd8adStls case 'K': g.form = 2; g.sufx = ".zip"; break;
3830e25fdb51Smrg case 'L':
3831e25fdb51Smrg fputs(VERSION, stderr);
3832cf2fd8adStls fputs("Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013"
3833cf2fd8adStls " Mark Adler\n",
3834e25fdb51Smrg stderr);
3835e25fdb51Smrg fputs("Subject to the terms of the zlib license.\n",
3836e25fdb51Smrg stderr);
3837e25fdb51Smrg fputs("No warranty is provided or implied.\n", stderr);
3838e25fdb51Smrg exit(0);
3839cf2fd8adStls case 'M': get = 5; break;
3840cf2fd8adStls case 'N': g.headis = 3; break;
3841cf2fd8adStls case 'O': g.zopts.blocksplitting = 0; break;
3842cf2fd8adStls case 'R': g.rsync = 1; break;
3843e25fdb51Smrg case 'S': get = 3; break;
3844cf2fd8adStls case 'T': g.headis &= ~2; break;
3845e25fdb51Smrg case 'V': fputs(VERSION, stderr); exit(0);
3846e25fdb51Smrg case 'Z':
3847e25fdb51Smrg bail("invalid option: LZW output not supported: ", bad);
3848e25fdb51Smrg case 'a':
3849e25fdb51Smrg bail("invalid option: ascii conversion not supported: ", bad);
3850e25fdb51Smrg case 'b': get = 1; break;
3851cf2fd8adStls case 'c': g.pipeout = 1; break;
3852cf2fd8adStls case 'd': g.decode = 1; g.headis = 0; break;
3853cf2fd8adStls case 'f': g.force = 1; break;
3854e25fdb51Smrg case 'h': help(); break;
3855cf2fd8adStls case 'i': g.setdict = 0; break;
3856cf2fd8adStls case 'k': g.keep = 1; break;
3857cf2fd8adStls case 'l': g.list = 1; break;
3858cf2fd8adStls case 'n': g.headis &= ~1; break;
3859e25fdb51Smrg case 'p': get = 2; break;
3860cf2fd8adStls case 'q': g.verbosity = 0; break;
3861cf2fd8adStls case 'r': g.recurse = 1; break;
3862cf2fd8adStls case 't': g.decode = 2; break;
3863cf2fd8adStls case 'v': g.verbosity++; break;
3864cf2fd8adStls case 'z': g.form = 1; g.sufx = ".zz"; break;
3865e25fdb51Smrg default:
3866e25fdb51Smrg bail("invalid option: ", bad);
3867e25fdb51Smrg }
3868e25fdb51Smrg } while (*++arg);
3869e25fdb51Smrg if (*arg == 0)
3870e25fdb51Smrg return 0;
3871e25fdb51Smrg }
3872e25fdb51Smrg
3873cf2fd8adStls /* process option parameter for -b, -p, -S, -I, or -M */
3874e25fdb51Smrg if (get) {
3875e25fdb51Smrg size_t n;
3876e25fdb51Smrg
3877e25fdb51Smrg if (get == 1) {
3878e25fdb51Smrg n = num(arg);
3879cf2fd8adStls g.block = n << 10; /* chunk size */
3880cf2fd8adStls if (g.block < DICT)
3881e25fdb51Smrg bail("block size too small (must be >= 32K)", "");
3882cf2fd8adStls if (n != g.block >> 10 ||
3883cf2fd8adStls OUTPOOL(g.block) < g.block ||
3884cf2fd8adStls (ssize_t)OUTPOOL(g.block) < 0 ||
3885cf2fd8adStls g.block > (1UL << 22))
3886e25fdb51Smrg bail("block size too large: ", arg);
3887e25fdb51Smrg new_opts();
3888e25fdb51Smrg }
3889e25fdb51Smrg else if (get == 2) {
3890e25fdb51Smrg n = num(arg);
3891cf2fd8adStls g.procs = (int)n; /* # processes */
3892cf2fd8adStls if (g.procs < 1)
3893e25fdb51Smrg bail("invalid number of processes: ", arg);
3894cf2fd8adStls if ((size_t)g.procs != n || INBUFS(g.procs) < 1)
3895e25fdb51Smrg bail("too many processes: ", arg);
3896e25fdb51Smrg #ifdef NOTHREAD
3897cf2fd8adStls if (g.procs > 1)
3898cf2fd8adStls bail("compiled without threads", "");
3899e25fdb51Smrg #endif
3900e25fdb51Smrg new_opts();
3901e25fdb51Smrg }
3902e25fdb51Smrg else if (get == 3)
3903cf2fd8adStls g.sufx = arg; /* gz suffix */
3904cf2fd8adStls else if (get == 4)
3905cf2fd8adStls g.zopts.numiterations = num(arg); /* optimization iterations */
3906cf2fd8adStls else if (get == 5)
3907cf2fd8adStls g.zopts.blocksplittingmax = num(arg); /* max block splits */
3908e25fdb51Smrg get = 0;
3909e25fdb51Smrg return 0;
3910e25fdb51Smrg }
3911e25fdb51Smrg
3912e25fdb51Smrg /* neither an option nor parameter */
3913e25fdb51Smrg return 1;
3914e25fdb51Smrg }
3915e25fdb51Smrg
3916e25fdb51Smrg /* catch termination signal */
cut_short(int sig)3917e25fdb51Smrg local void cut_short(int sig)
3918e25fdb51Smrg {
3919e25fdb51Smrg (void)sig;
3920e25fdb51Smrg Trace(("termination by user"));
3921cf2fd8adStls if (g.outd != -1 && g.outf != NULL)
3922cf2fd8adStls unlink(g.outf);
3923e25fdb51Smrg log_dump();
3924e25fdb51Smrg _exit(1);
3925e25fdb51Smrg }
3926e25fdb51Smrg
3927e25fdb51Smrg /* Process arguments, compress in the gzip format. Note that procs must be at
3928e25fdb51Smrg least two in order to provide a dictionary in one work unit for the other
3929e25fdb51Smrg work unit, and that size must be at least 32K to store a full dictionary. */
main(int argc,char ** argv)3930e25fdb51Smrg int main(int argc, char **argv)
3931e25fdb51Smrg {
3932e25fdb51Smrg int n; /* general index */
3933cf2fd8adStls int noop; /* true to suppress option decoding */
3934e25fdb51Smrg unsigned long done; /* number of named files processed */
3935e25fdb51Smrg char *opts, *p; /* environment default options, marker */
3936e25fdb51Smrg
3937cf2fd8adStls /* initialize globals */
3938cf2fd8adStls g.outf = NULL;
3939cf2fd8adStls g.first = 1;
3940cf2fd8adStls g.warned = 0;
3941cf2fd8adStls g.hname = NULL;
3942cf2fd8adStls
3943cf2fd8adStls /* save pointer to program name for error messages */
3944cf2fd8adStls p = strrchr(argv[0], '/');
3945cf2fd8adStls p = p == NULL ? argv[0] : p + 1;
3946cf2fd8adStls g.prog = *p ? p : "pigz";
3947cf2fd8adStls
3948e25fdb51Smrg /* prepare for interrupts and logging */
3949e25fdb51Smrg signal(SIGINT, cut_short);
3950e25fdb51Smrg #ifndef NOTHREAD
3951cf2fd8adStls yarn_prefix = g.prog; /* prefix for yarn error messages */
3952e25fdb51Smrg yarn_abort = cut_short; /* call on thread error */
3953e25fdb51Smrg #endif
3954e25fdb51Smrg #ifdef DEBUG
3955e25fdb51Smrg gettimeofday(&start, NULL); /* starting time for log entries */
3956e25fdb51Smrg log_init(); /* initialize logging */
3957e25fdb51Smrg #endif
3958e25fdb51Smrg
3959e25fdb51Smrg /* set all options to defaults */
3960e25fdb51Smrg defaults();
3961e25fdb51Smrg
3962cf2fd8adStls /* process user environment variable defaults in GZIP */
3963e25fdb51Smrg opts = getenv("GZIP");
3964e25fdb51Smrg if (opts != NULL) {
3965e25fdb51Smrg while (*opts) {
3966e25fdb51Smrg while (*opts == ' ' || *opts == '\t')
3967e25fdb51Smrg opts++;
3968e25fdb51Smrg p = opts;
3969e25fdb51Smrg while (*p && *p != ' ' && *p != '\t')
3970e25fdb51Smrg p++;
3971e25fdb51Smrg n = *p;
3972e25fdb51Smrg *p = 0;
3973e25fdb51Smrg if (option(opts))
3974e25fdb51Smrg bail("cannot provide files in GZIP environment variable", "");
3975e25fdb51Smrg opts = p + (n ? 1 : 0);
3976e25fdb51Smrg }
3977e25fdb51Smrg option(NULL);
3978e25fdb51Smrg }
3979e25fdb51Smrg
3980cf2fd8adStls /* process user environment variable defaults in PIGZ as well */
3981cf2fd8adStls opts = getenv("PIGZ");
3982cf2fd8adStls if (opts != NULL) {
3983cf2fd8adStls while (*opts) {
3984cf2fd8adStls while (*opts == ' ' || *opts == '\t')
3985cf2fd8adStls opts++;
3986cf2fd8adStls p = opts;
3987cf2fd8adStls while (*p && *p != ' ' && *p != '\t')
3988cf2fd8adStls p++;
3989cf2fd8adStls n = *p;
3990cf2fd8adStls *p = 0;
3991cf2fd8adStls if (option(opts))
3992cf2fd8adStls bail("cannot provide files in PIGZ environment variable", "");
3993cf2fd8adStls opts = p + (n ? 1 : 0);
3994cf2fd8adStls }
3995cf2fd8adStls option(NULL);
3996cf2fd8adStls }
3997cf2fd8adStls
3998cf2fd8adStls /* decompress if named "unpigz" or "gunzip", to stdout if "*cat" */
3999cf2fd8adStls if (strcmp(g.prog, "unpigz") == 0 || strcmp(g.prog, "gunzip") == 0)
4000cf2fd8adStls g.decode = 1, g.headis = 0;
4001cf2fd8adStls if ((n = strlen(g.prog)) > 2 && strcmp(g.prog + n - 3, "cat") == 0)
4002cf2fd8adStls g.decode = 1, g.headis = 0, g.pipeout = 1;
4003cf2fd8adStls
4004cf2fd8adStls /* if no arguments and compressed data to or from a terminal, show help */
4005cf2fd8adStls if (argc < 2 && isatty(g.decode ? 0 : 1))
4006e25fdb51Smrg help();
4007e25fdb51Smrg
4008cf2fd8adStls /* process command-line arguments, no options after "--" */
4009cf2fd8adStls done = noop = 0;
4010e25fdb51Smrg for (n = 1; n < argc; n++)
4011cf2fd8adStls if (noop == 0 && strcmp(argv[n], "--") == 0) {
4012cf2fd8adStls noop = 1;
4013cf2fd8adStls option(NULL);
4014e25fdb51Smrg }
4015cf2fd8adStls else if (noop || option(argv[n])) { /* true if file name, process it */
4016cf2fd8adStls if (done == 1 && g.pipeout && !g.decode && !g.list && g.form > 1)
4017cf2fd8adStls complain("warning: output will be concatenated zip files -- "
4018cf2fd8adStls "will not be able to extract");
4019e25fdb51Smrg process(strcmp(argv[n], "-") ? argv[n] : NULL);
4020e25fdb51Smrg done++;
4021e25fdb51Smrg }
4022e25fdb51Smrg option(NULL);
4023e25fdb51Smrg
4024e25fdb51Smrg /* list stdin or compress stdin to stdout if no file names provided */
4025e25fdb51Smrg if (done == 0)
4026e25fdb51Smrg process(NULL);
4027e25fdb51Smrg
4028e25fdb51Smrg /* done -- release resources, show log */
4029e25fdb51Smrg new_opts();
4030e25fdb51Smrg log_dump();
4031cf2fd8adStls return g.warned ? 2 : 0;
4032e25fdb51Smrg }
4033