1 #ifdef WIN32
2
3 #include <windows.h>
4 #include <stdio.h>
5 #include <tchar.h>
6 #include "zip/zip.h"
7
8
9 // THIS FILE is almost entirely based upon code by info-zip.
10 // It has been modified by Lucian Wischik. The modifications
11 // were a complete rewrite of the bit of code that generates the
12 // layout of the zipfile, and support for zipping to/from memory
13 // or handles or pipes or pagefile or diskfiles, encryption, unicode.
14 // The original code may be found at http://www.info-zip.org
15 // The original copyright text follows.
16 //
17 //
18 //
19 // This is version 1999-Oct-05 of the Info-ZIP copyright and license.
20 // The definitive version of this document should be available at
21 // ftp://ftp.cdrom.com/pub/infozip/license.html indefinitely.
22 //
23 // Copyright (c) 1990-1999 Info-ZIP. All rights reserved.
24 //
25 // For the purposes of this copyright and license, "Info-ZIP" is defined as
26 // the following set of individuals:
27 //
28 // Mark Adler, John Bush, Karl Davis, Harald Denker, Jean-Michel Dubois,
29 // Jean-loup Gailly, Hunter Goatley, Ian Gorman, Chris Herborth, Dirk Haase,
30 // Greg Hartwig, Robert Heath, Jonathan Hudson, Paul Kienitz, David Kirschbaum,
31 // Johnny Lee, Onno van der Linden, Igor Mandrichenko, Steve P. Miller,
32 // Sergio Monesi, Keith Owens, George Petrov, Greg Roelofs, Kai Uwe Rommel,
33 // Steve Salisbury, Dave Smith, Christian Spieler, Antoine Verheijen,
34 // Paul von Behren, Rich Wales, Mike White
35 //
36 // This software is provided "as is," without warranty of any kind, express
37 // or implied. In no event shall Info-ZIP or its contributors be held liable
38 // for any direct, indirect, incidental, special or consequential damages
39 // arising out of the use of or inability to use this software.
40 //
41 // Permission is granted to anyone to use this software for any purpose,
42 // including commercial applications, and to alter it and redistribute it
43 // freely, subject to the following restrictions:
44 //
45 // 1. Redistributions of source code must retain the above copyright notice,
46 // definition, disclaimer, and this list of conditions.
47 //
48 // 2. Redistributions in binary form must reproduce the above copyright
49 // notice, definition, disclaimer, and this list of conditions in
50 // documentation and/or other materials provided with the distribution.
51 //
52 // 3. Altered versions--including, but not limited to, ports to new operating
53 // systems, existing ports with new graphical interfaces, and dynamic,
54 // shared, or static library versions--must be plainly marked as such
55 // and must not be misrepresented as being the original source. Such
56 // altered versions also must not be misrepresented as being Info-ZIP
57 // releases--including, but not limited to, labeling of the altered
58 // versions with the names "Info-ZIP" (or any variation thereof, including,
59 // but not limited to, different capitalizations), "Pocket UnZip," "WiZ"
60 // or "MacZip" without the explicit permission of Info-ZIP. Such altered
61 // versions are further prohibited from misrepresentative use of the
62 // Zip-Bugs or Info-ZIP e-mail addresses or of the Info-ZIP URL(s).
63 //
64 // 4. Info-ZIP retains the right to use the names "Info-ZIP," "Zip," "UnZip,"
65 // "WiZ," "Pocket UnZip," "Pocket Zip," and "MacZip" for its own source and
66 // binary releases.
67 //
68
69
70 typedef unsigned char uch; // unsigned 8-bit value
71 typedef unsigned short ush; // unsigned 16-bit value
72 typedef unsigned long ulg; // unsigned 32-bit value
73 typedef size_t extent; // file size
74 typedef unsigned Pos; // must be at least 32 bits
75 typedef unsigned IPos; // A Pos is an index in the character window. Pos is used only for parameter passing
76
77 #ifndef EOF
78 #define EOF (-1)
79 #endif
80
81
82 // Error return values. The values 0..4 and 12..18 follow the conventions
83 // of PKZIP. The values 4..10 are all assigned to "insufficient memory"
84 // by PKZIP, so the codes 5..10 are used here for other purposes.
85 #define ZE_MISS -1 // used by procname(), zipbare()
86 #define ZE_OK 0 // success
87 #define ZE_EOF 2 // unexpected end of zip file
88 #define ZE_FORM 3 // zip file structure error
89 #define ZE_MEM 4 // out of memory
90 #define ZE_LOGIC 5 // internal logic error
91 #define ZE_BIG 6 // entry too large to split
92 #define ZE_NOTE 7 // invalid comment format
93 #define ZE_TEST 8 // zip test (-T) failed or out of memory
94 #define ZE_ABORT 9 // user interrupt or termination
95 #define ZE_TEMP 10 // error using a temp file
96 #define ZE_READ 11 // read or seek error
97 #define ZE_NONE 12 // nothing to do
98 #define ZE_NAME 13 // missing or empty zip file
99 #define ZE_WRITE 14 // error writing to a file
100 #define ZE_CREAT 15 // couldn't open to write
101 #define ZE_PARMS 16 // bad command line
102 #define ZE_OPEN 18 // could not open a specified file to read
103 #define ZE_MAXERR 18 // the highest error number
104
105
106 // internal file attribute
107 #define UNKNOWN (-1)
108 #define BINARY 0
109 #define ASCII 1
110
111 #define BEST -1 // Use best method (deflation or store)
112 #define STORE 0 // Store method
113 #define DEFLATE 8 // Deflation method
114
115 #define CRCVAL_INITIAL 0L
116
117 // MSDOS file or directory attributes
118 #define MSDOS_HIDDEN_ATTR 0x02
119 #define MSDOS_DIR_ATTR 0x10
120
121 // Lengths of headers after signatures in bytes
122 #define LOCHEAD 26
123 #define CENHEAD 42
124 #define ENDHEAD 18
125
126 // Definitions for extra field handling:
127 #define EB_HEADSIZE 4 /* length of a extra field block header */
128 #define EB_LEN 2 /* offset of data length field in header */
129 #define EB_UT_MINLEN 1 /* minimal UT field contains Flags byte */
130 #define EB_UT_FLAGS 0 /* byte offset of Flags field */
131 #define EB_UT_TIME1 1 /* byte offset of 1st time value */
132 #define EB_UT_FL_MTIME (1 << 0) /* mtime present */
133 #define EB_UT_FL_ATIME (1 << 1) /* atime present */
134 #define EB_UT_FL_CTIME (1 << 2) /* ctime present */
135 #define EB_UT_LEN(n) (EB_UT_MINLEN + 4 * (n))
136 #define EB_L_UT_SIZE (EB_HEADSIZE + EB_UT_LEN(3))
137 #define EB_C_UT_SIZE (EB_HEADSIZE + EB_UT_LEN(1))
138
139
140 // Macros for writing machine integers to little-endian format
141 #define PUTSH(a,f) {char _putsh_c=(char)((a)&0xff); wfunc(param,&_putsh_c,1); _putsh_c=(char)((a)>>8); wfunc(param,&_putsh_c,1);}
142 #define PUTLG(a,f) {PUTSH((a) & 0xffff,(f)) PUTSH((a) >> 16,(f))}
143
144
145 // -- Structure of a ZIP file --
146 // Signatures for zip file information headers
147 #define LOCSIG 0x04034b50L
148 #define CENSIG 0x02014b50L
149 #define ENDSIG 0x06054b50L
150 #define EXTLOCSIG 0x08074b50L
151
152
153 #define MIN_MATCH 3
154 #define MAX_MATCH 258
155 // The minimum and maximum match lengths
156
157
158 #define WSIZE (0x8000)
159 // Maximum window size = 32K. If you are really short of memory, compile
160 // with a smaller WSIZE but this reduces the compression ratio for files
161 // of size > WSIZE. WSIZE must be a power of two in the current implementation.
162 //
163
164 #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
165 // Minimum amount of lookahead, except at the end of the input file.
166 // See deflate.c for comments about the MIN_MATCH+1.
167 //
168
169 #define MAX_DIST (WSIZE-MIN_LOOKAHEAD)
170 // In order to simplify the code, particularly on 16 bit machines, match
171 // distances are limited to MAX_DIST instead of WSIZE.
172 //
173
174
175 #define ZIP_HANDLE 1
176 #define ZIP_FILENAME 2
177 #define ZIP_MEMORY 3
178 #define ZIP_FOLDER 4
179
180
181
182 // ===========================================================================
183 // Constants
184 //
185
186 #define MAX_BITS 15
187 // All codes must not exceed MAX_BITS bits
188
189 #define MAX_BL_BITS 7
190 // Bit length codes must not exceed MAX_BL_BITS bits
191
192 #define LENGTH_CODES 29
193 // number of length codes, not counting the special END_BLOCK code
194
195 #define LITERALS 256
196 // number of literal bytes 0..255
197
198 #define END_BLOCK 256
199 // end of block literal code
200
201 #define L_CODES (LITERALS+1+LENGTH_CODES)
202 // number of Literal or Length codes, including the END_BLOCK code
203
204 #define D_CODES 30
205 // number of distance codes
206
207 #define BL_CODES 19
208 // number of codes used to transfer the bit lengths
209
210
211 #define STORED_BLOCK 0
212 #define STATIC_TREES 1
213 #define DYN_TREES 2
214 // The three kinds of block type
215
216 #define LIT_BUFSIZE 0x8000
217 #define DIST_BUFSIZE LIT_BUFSIZE
218 // Sizes of match buffers for literals/lengths and distances. There are
219 // 4 reasons for limiting LIT_BUFSIZE to 64K:
220 // - frequencies can be kept in 16 bit counters
221 // - if compression is not successful for the first block, all input data is
222 // still in the window so we can still emit a stored block even when input
223 // comes from standard input. (This can also be done for all blocks if
224 // LIT_BUFSIZE is not greater than 32K.)
225 // - if compression is not successful for a file smaller than 64K, we can
226 // even emit a stored file instead of a stored block (saving 5 bytes).
227 // - creating new Huffman trees less frequently may not provide fast
228 // adaptation to changes in the input data statistics. (Take for
229 // example a binary file with poorly compressible code followed by
230 // a highly compressible string table.) Smaller buffer sizes give
231 // fast adaptation but have of course the overhead of transmitting trees
232 // more frequently.
233 // - I can't count above 4
234 // The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save
235 // memory at the expense of compression). Some optimizations would be possible
236 // if we rely on DIST_BUFSIZE == LIT_BUFSIZE.
237 //
238
239 #define REP_3_6 16
240 // repeat previous bit length 3-6 times (2 bits of repeat count)
241
242 #define REPZ_3_10 17
243 // repeat a zero length 3-10 times (3 bits of repeat count)
244
245 #define REPZ_11_138 18
246 // repeat a zero length 11-138 times (7 bits of repeat count)
247
248 #define HEAP_SIZE (2*L_CODES+1)
249 // maximum heap size
250
251
252 // ===========================================================================
253 // Local data used by the "bit string" routines.
254 //
255
256 #define Buf_size (8 * 2*sizeof(char))
257 // Number of bits used within bi_buf. (bi_buf may be implemented on
258 // more than 16 bits on some systems.)
259
260 // Output a 16 bit value to the bit stream, lower (oldest) byte first
261 #define PUTSHORT(state,w) \
262 { if (state.bs.out_offset >= state.bs.out_size-1) \
263 state.flush_outbuf(state.param,state.bs.out_buf, &state.bs.out_offset); \
264 state.bs.out_buf[state.bs.out_offset++] = (char) ((w) & 0xff); \
265 state.bs.out_buf[state.bs.out_offset++] = (char) ((ush)(w) >> 8); \
266 }
267
268 #define PUTBYTE(state,b) \
269 { if (state.bs.out_offset >= state.bs.out_size) \
270 state.flush_outbuf(state.param,state.bs.out_buf, &state.bs.out_offset); \
271 state.bs.out_buf[state.bs.out_offset++] = (char) (b); \
272 }
273
274 // DEFLATE.CPP HEADER
275
276 #define HASH_BITS 15
277 // For portability to 16 bit machines, do not use values above 15.
278
279 #define HASH_SIZE (unsigned)(1<<HASH_BITS)
280 #define HASH_MASK (HASH_SIZE-1)
281 #define WMASK (WSIZE-1)
282 // HASH_SIZE and WSIZE must be powers of two
283
284 #define NIL 0
285 // Tail of hash chains
286
287 #define FAST 4
288 #define SLOW 2
289 // speed options for the general purpose bit flag
290
291 #define TOO_FAR 4096
292 // Matches of length 3 are discarded if their distance exceeds TOO_FAR
293
294
295
296 #define EQUAL 0
297 // result of memcmp for equal strings
298
299
300 // ===========================================================================
301 // Local data used by the "longest match" routines.
302
303 #define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
304 // Number of bits by which ins_h and del_h must be shifted at each
305 // input step. It must be such that after MIN_MATCH steps, the oldest
306 // byte no longer takes part in the hash key, that is:
307 // H_SHIFT * MIN_MATCH >= HASH_BITS
308
309 #define max_insert_length max_lazy_match
310 // Insert new strings in the hash table only if the match length
311 // is not greater than this length. This saves time but degrades compression.
312 // max_insert_length is used only for compression levels <= 3.
313
314
315
316 const int extra_lbits[LENGTH_CODES] // extra bits for each length code
317 = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
318
319 const int extra_dbits[D_CODES] // extra bits for each distance code
320 = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
321
322 const int extra_blbits[BL_CODES]// extra bits for each bit length code
323 = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
324
325 const uch bl_order[BL_CODES] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
326 // The lengths of the bit length codes are sent in order of decreasing
327 // probability, to avoid transmitting the lengths for unused bit length codes.
328
329
330 typedef struct config {
331 ush good_length; // reduce lazy search above this match length
332 ush max_lazy; // do not perform lazy search above this match length
333 ush nice_length; // quit search above this match length
334 ush max_chain;
335 } config;
336
337 // Values for max_lazy_match, good_match, nice_match and max_chain_length,
338 // depending on the desired pack level (0..9). The values given below have
339 // been tuned to exclude worst case performance for pathological files.
340 // Better values may be found for specific files.
341 //
342
343 const config configuration_table[10] = {
344 // good lazy nice chain
345 {0, 0, 0, 0}, // 0 store only
346 {4, 4, 8, 4}, // 1 maximum speed, no lazy matches
347 {4, 5, 16, 8}, // 2
348 {4, 6, 32, 32}, // 3
349 {4, 4, 16, 16}, // 4 lazy matches */
350 {8, 16, 32, 32}, // 5
351 {8, 16, 128, 128}, // 6
352 {8, 32, 128, 256}, // 7
353 {32, 128, 258, 1024}, // 8
354 {32, 258, 258, 4096}};// 9 maximum compression */
355
356 // Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
357 // For deflate_fast() (levels <= 3) good is ignored and lazy has a different meaning.
358
359
360
361
362
363
364
365 // Data structure describing a single value and its code string.
366 typedef struct ct_data {
367 union {
368 ush freq; // frequency count
369 ush code; // bit string
370 } fc;
371 union {
372 ush dad; // father node in Huffman tree
373 ush len; // length of bit string
374 } dl;
375 } ct_data;
376
377 typedef struct tree_desc {
378 ct_data *dyn_tree; // the dynamic tree
379 ct_data *static_tree; // corresponding static tree or NULL
380 const int *extra_bits; // extra bits for each code or NULL
381 int extra_base; // base index for extra_bits
382 int elems; // max number of elements in the tree
383 int max_length; // max bit length for the codes
384 int max_code; // largest code with non zero frequency
385 } tree_desc;
386
387
388
389
390 class TTreeState
391 { public:
392 TTreeState();
393
394 ct_data dyn_ltree[HEAP_SIZE]; // literal and length tree
395 ct_data dyn_dtree[2*D_CODES+1]; // distance tree
396 ct_data static_ltree[L_CODES+2]; // the static literal tree...
397 // ... Since the bit lengths are imposed, there is no need for the L_CODES
398 // extra codes used during heap construction. However the codes 286 and 287
399 // are needed to build a canonical tree (see ct_init below).
400 ct_data static_dtree[D_CODES]; // the static distance tree...
401 // ... (Actually a trivial tree since all codes use 5 bits.)
402 ct_data bl_tree[2*BL_CODES+1]; // Huffman tree for the bit lengths
403
404 tree_desc l_desc;
405 tree_desc d_desc;
406 tree_desc bl_desc;
407
408 ush bl_count[MAX_BITS+1]; // number of codes at each bit length for an optimal tree
409
410 int heap[2*L_CODES+1]; // heap used to build the Huffman trees
411 int heap_len; // number of elements in the heap
412 int heap_max; // element of largest frequency
413 // The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
414 // The same heap array is used to build all trees.
415
416 uch depth[2*L_CODES+1];
417 // Depth of each subtree used as tie breaker for trees of equal frequency
418
419 uch length_code[MAX_MATCH-MIN_MATCH+1];
420 // length code for each normalized match length (0 == MIN_MATCH)
421
422 uch dist_code[512];
423 // distance codes. The first 256 values correspond to the distances
424 // 3 .. 258, the last 256 values correspond to the top 8 bits of
425 // the 15 bit distances.
426
427 int base_length[LENGTH_CODES];
428 // First normalized length for each code (0 = MIN_MATCH)
429
430 int base_dist[D_CODES];
431 // First normalized distance for each code (0 = distance of 1)
432
433 uch far l_buf[LIT_BUFSIZE]; // buffer for literals/lengths
434 ush far d_buf[DIST_BUFSIZE]; // buffer for distances
435
436 uch flag_buf[(LIT_BUFSIZE/8)];
437 // flag_buf is a bit array distinguishing literals from lengths in
438 // l_buf, and thus indicating the presence or absence of a distance.
439
440 unsigned last_lit; // running index in l_buf
441 unsigned last_dist; // running index in d_buf
442 unsigned last_flags; // running index in flag_buf
443 uch flags; // current flags not yet saved in flag_buf
444 uch flag_bit; // current bit used in flags
445 // bits are filled in flags starting at bit 0 (least significant).
446 // Note: these flags are overkill in the current code since we don't
447 // take advantage of DIST_BUFSIZE == LIT_BUFSIZE.
448
449 ulg opt_len; // bit length of current block with optimal trees
450 ulg static_len; // bit length of current block with static trees
451
452 ulg cmpr_bytelen; // total byte length of compressed file
453 ulg cmpr_len_bits; // number of bits past 'cmpr_bytelen'
454
455 ulg input_len; // total byte length of input file
456 // input_len is for debugging only since we can get it by other means.
457
458 ush *file_type; // pointer to UNKNOWN, BINARY or ASCII
459 // int *file_method; // pointer to DEFLATE or STORE
460 };
461
TTreeState()462 TTreeState::TTreeState()
463 { tree_desc a = {dyn_ltree, static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS, 0}; l_desc = a;
464 tree_desc b = {dyn_dtree, static_dtree, extra_dbits, 0, D_CODES, MAX_BITS, 0}; d_desc = b;
465 tree_desc c = {bl_tree, NULL, extra_blbits, 0, BL_CODES, MAX_BL_BITS, 0}; bl_desc = c;
466 last_lit=0;
467 last_dist=0;
468 last_flags=0;
469 }
470
471
472
473 class TBitState
474 { public:
475
476 int flush_flg;
477 //
478 unsigned bi_buf;
479 // Output buffer. bits are inserted starting at the bottom (least significant
480 // bits). The width of bi_buf must be at least 16 bits.
481 int bi_valid;
482 // Number of valid bits in bi_buf. All bits above the last valid bit
483 // are always zero.
484 char *out_buf;
485 // Current output buffer.
486 unsigned out_offset;
487 // Current offset in output buffer.
488 // On 16 bit machines, the buffer is limited to 64K.
489 unsigned out_size;
490 // Size of current output buffer
491 ulg bits_sent; // bit length of the compressed data only needed for debugging???
492 };
493
494
495
496
497
498
499
500 class TDeflateState
501 { public:
TDeflateState()502 TDeflateState() {window_size=0;}
503
504 uch window[2L*WSIZE];
505 // Sliding window. Input bytes are read into the second half of the window,
506 // and move to the first half later to keep a dictionary of at least WSIZE
507 // bytes. With this organization, matches are limited to a distance of
508 // WSIZE-MAX_MATCH bytes, but this ensures that IO is always
509 // performed with a length multiple of the block size. Also, it limits
510 // the window size to 64K, which is quite useful on MSDOS.
511 // To do: limit the window size to WSIZE+CBSZ if SMALL_MEM (the code would
512 // be less efficient since the data would have to be copied WSIZE/CBSZ times)
513 Pos prev[WSIZE];
514 // Link to older string with same hash index. To limit the size of this
515 // array to 64K, this link is maintained only for the last 32K strings.
516 // An index in this array is thus a window index modulo 32K.
517 Pos head[HASH_SIZE];
518 // Heads of the hash chains or NIL. If your compiler thinks that
519 // HASH_SIZE is a dynamic value, recompile with -DDYN_ALLOC.
520
521 ulg window_size;
522 // window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the
523 // input file length plus MIN_LOOKAHEAD.
524
525 long block_start;
526 // window position at the beginning of the current output block. Gets
527 // negative when the window is moved backwards.
528
529 int sliding;
530 // Set to false when the input file is already in memory
531
532 unsigned ins_h; // hash index of string to be inserted
533
534 unsigned int prev_length;
535 // Length of the best match at previous step. Matches not greater than this
536 // are discarded. This is used in the lazy match evaluation.
537
538 unsigned strstart; // start of string to insert
539 unsigned match_start; // start of matching string
540 int eofile; // flag set at end of input file
541 unsigned lookahead; // number of valid bytes ahead in window
542
543 unsigned max_chain_length;
544 // To speed up deflation, hash chains are never searched beyond this length.
545 // A higher limit improves compression ratio but degrades the speed.
546
547 unsigned int max_lazy_match;
548 // Attempt to find a better match only when the current match is strictly
549 // smaller than this value. This mechanism is used only for compression
550 // levels >= 4.
551
552 unsigned good_match;
553 // Use a faster search when the previous match is longer than this
554
555 int nice_match; // Stop searching when current match exceeds this
556 };
557
558 typedef __int64 lutime_t; // define it ourselves since we don't include time.h
559
560 typedef struct iztimes {
561 lutime_t atime,mtime,ctime;
562 } iztimes; // access, modify, create times
563
564 typedef struct zlist {
565 ush vem, ver, flg, how; // See central header in zipfile.c for what vem..off are
566 ulg tim, crc, siz, len;
567 extent nam, ext, cext, com; // offset of ext must be >= LOCHEAD
568 ush dsk, att, lflg; // offset of lflg must be >= LOCHEAD
569 ulg atx, off;
570 char name[MAX_PATH]; // File name in zip file
571 char *extra; // Extra field (set only if ext != 0)
572 char *cextra; // Extra in central (set only if cext != 0)
573 char *comment; // Comment (set only if com != 0)
574 char iname[MAX_PATH]; // Internal file name after cleanup
575 char zname[MAX_PATH]; // External version of internal name
576 int mark; // Marker for files to operate on
577 int trash; // Marker for files to delete
578 int dosflag; // Set to force MSDOS file attributes
579 struct zlist far *nxt; // Pointer to next header in list
580 } TZipFileInfo;
581
582
583 struct TState;
584 typedef unsigned (*READFUNC)(TState &state, char *buf,unsigned size);
585 typedef unsigned (*FLUSHFUNC)(void *param, const char *buf, unsigned *size);
586 typedef unsigned (*WRITEFUNC)(void *param, const char *buf, unsigned size);
587 struct TState
588 { void *param;
589 int level; bool seekable;
590 READFUNC readfunc; FLUSHFUNC flush_outbuf;
591 TTreeState ts; TBitState bs; TDeflateState ds;
592 const char *err;
593 };
594
595
596
597
598
599
600
601
602
Assert(TState & state,bool cond,const char * msg)603 void Assert(TState &state,bool cond, const char *msg)
604 { if (cond) return;
605 state.err=msg;
606 }
Trace(const char * x,...)607 void __cdecl Trace(const char *x, ...) {va_list paramList; va_start(paramList, x); paramList; va_end(paramList);}
Tracec(bool,const char * x,...)608 void __cdecl Tracec(bool ,const char *x, ...) {va_list paramList; va_start(paramList, x); paramList; va_end(paramList);}
609
610
611
612 // ===========================================================================
613 // Local (static) routines in this file.
614 //
615
616 void init_block (TState &);
617 void pqdownheap (TState &,ct_data *tree, int k);
618 void gen_bitlen (TState &,tree_desc *desc);
619 void gen_codes (TState &state,ct_data *tree, int max_code);
620 void build_tree (TState &,tree_desc *desc);
621 void scan_tree (TState &,ct_data *tree, int max_code);
622 void send_tree (TState &state,ct_data *tree, int max_code);
623 int build_bl_tree (TState &);
624 void send_all_trees (TState &state,int lcodes, int dcodes, int blcodes);
625 void compress_block (TState &state,ct_data *ltree, ct_data *dtree);
626 void set_file_type (TState &);
627 void send_bits (TState &state, int value, int length);
628 unsigned bi_reverse (unsigned code, int len);
629 void bi_windup (TState &state);
630 void copy_block (TState &state,char *buf, unsigned len, int header);
631
632
633 #define send_code(state, c, tree) send_bits(state, tree[c].fc.code, tree[c].dl.len)
634 // Send a code of the given tree. c and tree must not have side effects
635
636 // alternatively...
637 //#define send_code(state, c, tree)
638 // { if (state.verbose>1) fprintf(stderr,"\ncd %3d ",(c));
639 // send_bits(state, tree[c].fc.code, tree[c].dl.len); }
640
641 #define d_code(dist) ((dist) < 256 ? state.ts.dist_code[dist] : state.ts.dist_code[256+((dist)>>7)])
642 // Mapping from a distance to a distance code. dist is the distance - 1 and
643 // must not have side effects. dist_code[256] and dist_code[257] are never used.
644
645 #define Max(a,b) (a >= b ? a : b)
646 /* the arguments must not have side effects */
647
648 /* ===========================================================================
649 * Allocate the match buffer, initialize the various tables and save the
650 * location of the internal file attribute (ascii/binary) and method
651 * (DEFLATE/STORE).
652 */
ct_init(TState & state,ush * attr)653 void ct_init(TState &state, ush *attr)
654 {
655 int n; /* iterates over tree elements */
656 int bits; /* bit counter */
657 int length; /* length value */
658 int code; /* code value */
659 int dist; /* distance index */
660
661 state.ts.file_type = attr;
662 //state.ts.file_method = method;
663 state.ts.cmpr_bytelen = state.ts.cmpr_len_bits = 0L;
664 state.ts.input_len = 0L;
665
666 if (state.ts.static_dtree[0].dl.len != 0) return; /* ct_init already called */
667
668 /* Initialize the mapping length (0..255) -> length code (0..28) */
669 length = 0;
670 for (code = 0; code < LENGTH_CODES-1; code++) {
671 state.ts.base_length[code] = length;
672 for (n = 0; n < (1<<extra_lbits[code]); n++) {
673 state.ts.length_code[length++] = (uch)code;
674 }
675 }
676 Assert(state,length == 256, "ct_init: length != 256");
677 /* Note that the length 255 (match length 258) can be represented
678 * in two different ways: code 284 + 5 bits or code 285, so we
679 * overwrite length_code[255] to use the best encoding:
680 */
681 state.ts.length_code[length-1] = (uch)code;
682
683 /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
684 dist = 0;
685 for (code = 0 ; code < 16; code++) {
686 state.ts.base_dist[code] = dist;
687 for (n = 0; n < (1<<extra_dbits[code]); n++) {
688 state.ts.dist_code[dist++] = (uch)code;
689 }
690 }
691 Assert(state,dist == 256, "ct_init: dist != 256");
692 dist >>= 7; /* from now on, all distances are divided by 128 */
693 for ( ; code < D_CODES; code++) {
694 state.ts.base_dist[code] = dist << 7;
695 for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
696 state.ts.dist_code[256 + dist++] = (uch)code;
697 }
698 }
699 Assert(state,dist == 256, "ct_init: 256+dist != 512");
700
701 /* Construct the codes of the static literal tree */
702 for (bits = 0; bits <= MAX_BITS; bits++) state.ts.bl_count[bits] = 0;
703 n = 0;
704 while (n <= 143) state.ts.static_ltree[n++].dl.len = 8, state.ts.bl_count[8]++;
705 while (n <= 255) state.ts.static_ltree[n++].dl.len = 9, state.ts.bl_count[9]++;
706 while (n <= 279) state.ts.static_ltree[n++].dl.len = 7, state.ts.bl_count[7]++;
707 while (n <= 287) state.ts.static_ltree[n++].dl.len = 8, state.ts.bl_count[8]++;
708 /* fc.codes 286 and 287 do not exist, but we must include them in the
709 * tree construction to get a canonical Huffman tree (longest code
710 * all ones)
711 */
712 gen_codes(state,(ct_data *)state.ts.static_ltree, L_CODES+1);
713
714 /* The static distance tree is trivial: */
715 for (n = 0; n < D_CODES; n++) {
716 state.ts.static_dtree[n].dl.len = 5;
717 state.ts.static_dtree[n].fc.code = (ush)bi_reverse(n, 5);
718 }
719
720 /* Initialize the first block of the first file: */
721 init_block(state);
722 }
723
724 /* ===========================================================================
725 * Initialize a new block.
726 */
init_block(TState & state)727 void init_block(TState &state)
728 {
729 int n; /* iterates over tree elements */
730
731 /* Initialize the trees. */
732 for (n = 0; n < L_CODES; n++) state.ts.dyn_ltree[n].fc.freq = 0;
733 for (n = 0; n < D_CODES; n++) state.ts.dyn_dtree[n].fc.freq = 0;
734 for (n = 0; n < BL_CODES; n++) state.ts.bl_tree[n].fc.freq = 0;
735
736 state.ts.dyn_ltree[END_BLOCK].fc.freq = 1;
737 state.ts.opt_len = state.ts.static_len = 0L;
738 state.ts.last_lit = state.ts.last_dist = state.ts.last_flags = 0;
739 state.ts.flags = 0; state.ts.flag_bit = 1;
740 }
741
742 #define SMALLEST 1
743 /* Index within the heap array of least frequent node in the Huffman tree */
744
745
746 /* ===========================================================================
747 * Remove the smallest element from the heap and recreate the heap with
748 * one less element. Updates heap and heap_len.
749 */
750 #define pqremove(tree, top) \
751 {\
752 top = state.ts.heap[SMALLEST]; \
753 state.ts.heap[SMALLEST] = state.ts.heap[state.ts.heap_len--]; \
754 pqdownheap(state,tree, SMALLEST); \
755 }
756
757 /* ===========================================================================
758 * Compares to subtrees, using the tree depth as tie breaker when
759 * the subtrees have equal frequency. This minimizes the worst case length.
760 */
761 #define smaller(tree, n, m) \
762 (tree[n].fc.freq < tree[m].fc.freq || \
763 (tree[n].fc.freq == tree[m].fc.freq && state.ts.depth[n] <= state.ts.depth[m]))
764
765 /* ===========================================================================
766 * Restore the heap property by moving down the tree starting at node k,
767 * exchanging a node with the smallest of its two sons if necessary, stopping
768 * when the heap property is re-established (each father smaller than its
769 * two sons).
770 */
pqdownheap(TState & state,ct_data * tree,int k)771 void pqdownheap(TState &state,ct_data *tree, int k)
772 {
773 int v = state.ts.heap[k];
774 int j = k << 1; /* left son of k */
775 int htemp; /* required because of bug in SASC compiler */
776
777 while (j <= state.ts.heap_len) {
778 /* Set j to the smallest of the two sons: */
779 if (j < state.ts.heap_len && smaller(tree, state.ts.heap[j+1], state.ts.heap[j])) j++;
780
781 /* Exit if v is smaller than both sons */
782 htemp = state.ts.heap[j];
783 if (smaller(tree, v, htemp)) break;
784
785 /* Exchange v with the smallest son */
786 state.ts.heap[k] = htemp;
787 k = j;
788
789 /* And continue down the tree, setting j to the left son of k */
790 j <<= 1;
791 }
792 state.ts.heap[k] = v;
793 }
794
795 /* ===========================================================================
796 * Compute the optimal bit lengths for a tree and update the total bit length
797 * for the current block.
798 * IN assertion: the fields freq and dad are set, heap[heap_max] and
799 * above are the tree nodes sorted by increasing frequency.
800 * OUT assertions: the field len is set to the optimal bit length, the
801 * array bl_count contains the frequencies for each bit length.
802 * The length opt_len is updated; static_len is also updated if stree is
803 * not null.
804 */
gen_bitlen(TState & state,tree_desc * desc)805 void gen_bitlen(TState &state,tree_desc *desc)
806 {
807 ct_data *tree = desc->dyn_tree;
808 const int *extra = desc->extra_bits;
809 int base = desc->extra_base;
810 int max_code = desc->max_code;
811 int max_length = desc->max_length;
812 ct_data *stree = desc->static_tree;
813 int h; /* heap index */
814 int n, m; /* iterate over the tree elements */
815 int bits; /* bit length */
816 int xbits; /* extra bits */
817 ush f; /* frequency */
818 int overflow = 0; /* number of elements with bit length too large */
819
820 for (bits = 0; bits <= MAX_BITS; bits++) state.ts.bl_count[bits] = 0;
821
822 /* In a first pass, compute the optimal bit lengths (which may
823 * overflow in the case of the bit length tree).
824 */
825 tree[state.ts.heap[state.ts.heap_max]].dl.len = 0; /* root of the heap */
826
827 for (h = state.ts.heap_max+1; h < HEAP_SIZE; h++) {
828 n = state.ts.heap[h];
829 bits = tree[tree[n].dl.dad].dl.len + 1;
830 if (bits > max_length) bits = max_length, overflow++;
831 tree[n].dl.len = (ush)bits;
832 /* We overwrite tree[n].dl.dad which is no longer needed */
833
834 if (n > max_code) continue; /* not a leaf node */
835
836 state.ts.bl_count[bits]++;
837 xbits = 0;
838 if (n >= base) xbits = extra[n-base];
839 f = tree[n].fc.freq;
840 state.ts.opt_len += (ulg)f * (bits + xbits);
841 if (stree) state.ts.static_len += (ulg)f * (stree[n].dl.len + xbits);
842 }
843 if (overflow == 0) return;
844
845 Trace("\nbit length overflow\n");
846 /* This happens for example on obj2 and pic of the Calgary corpus */
847
848 /* Find the first bit length which could increase: */
849 do {
850 bits = max_length-1;
851 while (state.ts.bl_count[bits] == 0) bits--;
852 state.ts.bl_count[bits]--; /* move one leaf down the tree */
853 state.ts.bl_count[bits+1] += (ush)2; /* move one overflow item as its brother */
854 state.ts.bl_count[max_length]--;
855 /* The brother of the overflow item also moves one step up,
856 * but this does not affect bl_count[max_length]
857 */
858 overflow -= 2;
859 } while (overflow > 0);
860
861 /* Now recompute all bit lengths, scanning in increasing frequency.
862 * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
863 * lengths instead of fixing only the wrong ones. This idea is taken
864 * from 'ar' written by Haruhiko Okumura.)
865 */
866 for (bits = max_length; bits != 0; bits--) {
867 n = state.ts.bl_count[bits];
868 while (n != 0) {
869 m = state.ts.heap[--h];
870 if (m > max_code) continue;
871 if (tree[m].dl.len != (ush)bits) {
872 Trace("code %d bits %d->%d\n", m, tree[m].dl.len, bits);
873 state.ts.opt_len += ((long)bits-(long)tree[m].dl.len)*(long)tree[m].fc.freq;
874 tree[m].dl.len = (ush)bits;
875 }
876 n--;
877 }
878 }
879 }
880
881 /* ===========================================================================
882 * Generate the codes for a given tree and bit counts (which need not be
883 * optimal).
884 * IN assertion: the array bl_count contains the bit length statistics for
885 * the given tree and the field len is set for all tree elements.
886 * OUT assertion: the field code is set for all tree elements of non
887 * zero code length.
888 */
gen_codes(TState & state,ct_data * tree,int max_code)889 void gen_codes (TState &state, ct_data *tree, int max_code)
890 {
891 ush next_code[MAX_BITS+1]; /* next code value for each bit length */
892 ush code = 0; /* running code value */
893 int bits; /* bit index */
894 int n; /* code index */
895
896 /* The distribution counts are first used to generate the code values
897 * without bit reversal.
898 */
899 for (bits = 1; bits <= MAX_BITS; bits++) {
900 next_code[bits] = code = (ush)((code + state.ts.bl_count[bits-1]) << 1);
901 }
902 /* Check that the bit counts in bl_count are consistent. The last code
903 * must be all ones.
904 */
905 Assert(state,code + state.ts.bl_count[MAX_BITS]-1 == (1<< ((ush) MAX_BITS)) - 1,
906 "inconsistent bit counts");
907 Trace("\ngen_codes: max_code %d ", max_code);
908
909 for (n = 0; n <= max_code; n++) {
910 int len = tree[n].dl.len;
911 if (len == 0) continue;
912 /* Now reverse the bits */
913 tree[n].fc.code = (ush)bi_reverse(next_code[len]++, len);
914
915 //Tracec(tree != state.ts.static_ltree, "\nn %3d %c l %2d c %4x (%x) ", n, (isgraph(n) ? n : ' '), len, tree[n].fc.code, next_code[len]-1);
916 }
917 }
918
919 /* ===========================================================================
920 * Construct one Huffman tree and assigns the code bit strings and lengths.
921 * Update the total bit length for the current block.
922 * IN assertion: the field freq is set for all tree elements.
923 * OUT assertions: the fields len and code are set to the optimal bit length
924 * and corresponding code. The length opt_len is updated; static_len is
925 * also updated if stree is not null. The field max_code is set.
926 */
build_tree(TState & state,tree_desc * desc)927 void build_tree(TState &state,tree_desc *desc)
928 {
929 ct_data *tree = desc->dyn_tree;
930 ct_data *stree = desc->static_tree;
931 int elems = desc->elems;
932 int n, m; /* iterate over heap elements */
933 int max_code = -1; /* largest code with non zero frequency */
934 int node = elems; /* next internal node of the tree */
935
936 /* Construct the initial heap, with least frequent element in
937 * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
938 * heap[0] is not used.
939 */
940 state.ts.heap_len = 0, state.ts.heap_max = HEAP_SIZE;
941
942 for (n = 0; n < elems; n++) {
943 if (tree[n].fc.freq != 0) {
944 state.ts.heap[++state.ts.heap_len] = max_code = n;
945 state.ts.depth[n] = 0;
946 } else {
947 tree[n].dl.len = 0;
948 }
949 }
950
951 /* The pkzip format requires that at least one distance code exists,
952 * and that at least one bit should be sent even if there is only one
953 * possible code. So to avoid special checks later on we force at least
954 * two codes of non zero frequency.
955 */
956 while (state.ts.heap_len < 2) {
957 int newcp = state.ts.heap[++state.ts.heap_len] = (max_code < 2 ? ++max_code : 0);
958 tree[newcp].fc.freq = 1;
959 state.ts.depth[newcp] = 0;
960 state.ts.opt_len--; if (stree) state.ts.static_len -= stree[newcp].dl.len;
961 /* new is 0 or 1 so it does not have extra bits */
962 }
963 desc->max_code = max_code;
964
965 /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
966 * establish sub-heaps of increasing lengths:
967 */
968 for (n = state.ts.heap_len/2; n >= 1; n--) pqdownheap(state,tree, n);
969
970 /* Construct the Huffman tree by repeatedly combining the least two
971 * frequent nodes.
972 */
973 do {
974 pqremove(tree, n); /* n = node of least frequency */
975 m = state.ts.heap[SMALLEST]; /* m = node of next least frequency */
976
977 state.ts.heap[--state.ts.heap_max] = n; /* keep the nodes sorted by frequency */
978 state.ts.heap[--state.ts.heap_max] = m;
979
980 /* Create a new node father of n and m */
981 tree[node].fc.freq = (ush)(tree[n].fc.freq + tree[m].fc.freq);
982 state.ts.depth[node] = (uch) (Max(state.ts.depth[n], state.ts.depth[m]) + 1);
983 tree[n].dl.dad = tree[m].dl.dad = (ush)node;
984 /* and insert the new node in the heap */
985 state.ts.heap[SMALLEST] = node++;
986 pqdownheap(state,tree, SMALLEST);
987
988 } while (state.ts.heap_len >= 2);
989
990 state.ts.heap[--state.ts.heap_max] = state.ts.heap[SMALLEST];
991
992 /* At this point, the fields freq and dad are set. We can now
993 * generate the bit lengths.
994 */
995 gen_bitlen(state,(tree_desc *)desc);
996
997 /* The field len is now set, we can generate the bit codes */
998 gen_codes (state,(ct_data *)tree, max_code);
999 }
1000
1001 /* ===========================================================================
1002 * Scan a literal or distance tree to determine the frequencies of the codes
1003 * in the bit length tree. Updates opt_len to take into account the repeat
1004 * counts. (The contribution of the bit length codes will be added later
1005 * during the construction of bl_tree.)
1006 */
scan_tree(TState & state,ct_data * tree,int max_code)1007 void scan_tree (TState &state,ct_data *tree, int max_code)
1008 {
1009 int n; /* iterates over all tree elements */
1010 int prevlen = -1; /* last emitted length */
1011 int curlen; /* length of current code */
1012 int nextlen = tree[0].dl.len; /* length of next code */
1013 int count = 0; /* repeat count of the current code */
1014 int max_count = 7; /* max repeat count */
1015 int min_count = 4; /* min repeat count */
1016
1017 if (nextlen == 0) max_count = 138, min_count = 3;
1018 tree[max_code+1].dl.len = (ush)-1; /* guard */
1019
1020 for (n = 0; n <= max_code; n++) {
1021 curlen = nextlen; nextlen = tree[n+1].dl.len;
1022 if (++count < max_count && curlen == nextlen) {
1023 continue;
1024 } else if (count < min_count) {
1025 state.ts.bl_tree[curlen].fc.freq = (ush)(state.ts.bl_tree[curlen].fc.freq + count);
1026 } else if (curlen != 0) {
1027 if (curlen != prevlen) state.ts.bl_tree[curlen].fc.freq++;
1028 state.ts.bl_tree[REP_3_6].fc.freq++;
1029 } else if (count <= 10) {
1030 state.ts.bl_tree[REPZ_3_10].fc.freq++;
1031 } else {
1032 state.ts.bl_tree[REPZ_11_138].fc.freq++;
1033 }
1034 count = 0; prevlen = curlen;
1035 if (nextlen == 0) {
1036 max_count = 138, min_count = 3;
1037 } else if (curlen == nextlen) {
1038 max_count = 6, min_count = 3;
1039 } else {
1040 max_count = 7, min_count = 4;
1041 }
1042 }
1043 }
1044
1045 /* ===========================================================================
1046 * Send a literal or distance tree in compressed form, using the codes in
1047 * bl_tree.
1048 */
send_tree(TState & state,ct_data * tree,int max_code)1049 void send_tree (TState &state, ct_data *tree, int max_code)
1050 {
1051 int n; /* iterates over all tree elements */
1052 int prevlen = -1; /* last emitted length */
1053 int curlen; /* length of current code */
1054 int nextlen = tree[0].dl.len; /* length of next code */
1055 int count = 0; /* repeat count of the current code */
1056 int max_count = 7; /* max repeat count */
1057 int min_count = 4; /* min repeat count */
1058
1059 /* tree[max_code+1].dl.len = -1; */ /* guard already set */
1060 if (nextlen == 0) max_count = 138, min_count = 3;
1061
1062 for (n = 0; n <= max_code; n++) {
1063 curlen = nextlen; nextlen = tree[n+1].dl.len;
1064 if (++count < max_count && curlen == nextlen) {
1065 continue;
1066 } else if (count < min_count) {
1067 do { send_code(state, curlen, state.ts.bl_tree); } while (--count != 0);
1068
1069 } else if (curlen != 0) {
1070 if (curlen != prevlen) {
1071 send_code(state, curlen, state.ts.bl_tree); count--;
1072 }
1073 Assert(state,count >= 3 && count <= 6, " 3_6?");
1074 send_code(state,REP_3_6, state.ts.bl_tree); send_bits(state,count-3, 2);
1075
1076 } else if (count <= 10) {
1077 send_code(state,REPZ_3_10, state.ts.bl_tree); send_bits(state,count-3, 3);
1078
1079 } else {
1080 send_code(state,REPZ_11_138, state.ts.bl_tree); send_bits(state,count-11, 7);
1081 }
1082 count = 0; prevlen = curlen;
1083 if (nextlen == 0) {
1084 max_count = 138, min_count = 3;
1085 } else if (curlen == nextlen) {
1086 max_count = 6, min_count = 3;
1087 } else {
1088 max_count = 7, min_count = 4;
1089 }
1090 }
1091 }
1092
1093 /* ===========================================================================
1094 * Construct the Huffman tree for the bit lengths and return the index in
1095 * bl_order of the last bit length code to send.
1096 */
build_bl_tree(TState & state)1097 int build_bl_tree(TState &state)
1098 {
1099 int max_blindex; /* index of last bit length code of non zero freq */
1100
1101 /* Determine the bit length frequencies for literal and distance trees */
1102 scan_tree(state,(ct_data *)state.ts.dyn_ltree, state.ts.l_desc.max_code);
1103 scan_tree(state,(ct_data *)state.ts.dyn_dtree, state.ts.d_desc.max_code);
1104
1105 /* Build the bit length tree: */
1106 build_tree(state,(tree_desc *)(&state.ts.bl_desc));
1107 /* opt_len now includes the length of the tree representations, except
1108 * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
1109 */
1110
1111 /* Determine the number of bit length codes to send. The pkzip format
1112 * requires that at least 4 bit length codes be sent. (appnote.txt says
1113 * 3 but the actual value used is 4.)
1114 */
1115 for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
1116 if (state.ts.bl_tree[bl_order[max_blindex]].dl.len != 0) break;
1117 }
1118 /* Update opt_len to include the bit length tree and counts */
1119 state.ts.opt_len += 3*(max_blindex+1) + 5+5+4;
1120 Trace("\ndyn trees: dyn %ld, stat %ld", state.ts.opt_len, state.ts.static_len);
1121
1122 return max_blindex;
1123 }
1124
1125 /* ===========================================================================
1126 * Send the header for a block using dynamic Huffman trees: the counts, the
1127 * lengths of the bit length codes, the literal tree and the distance tree.
1128 * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
1129 */
send_all_trees(TState & state,int lcodes,int dcodes,int blcodes)1130 void send_all_trees(TState &state,int lcodes, int dcodes, int blcodes)
1131 {
1132 int rank; /* index in bl_order */
1133
1134 Assert(state,lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
1135 Assert(state,lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
1136 "too many codes");
1137 Trace("\nbl counts: ");
1138 send_bits(state,lcodes-257, 5);
1139 /* not +255 as stated in appnote.txt 1.93a or -256 in 2.04c */
1140 send_bits(state,dcodes-1, 5);
1141 send_bits(state,blcodes-4, 4); /* not -3 as stated in appnote.txt */
1142 for (rank = 0; rank < blcodes; rank++) {
1143 Trace("\nbl code %2d ", bl_order[rank]);
1144 send_bits(state,state.ts.bl_tree[bl_order[rank]].dl.len, 3);
1145 }
1146 Trace("\nbl tree: sent %ld", state.bs.bits_sent);
1147
1148 send_tree(state,(ct_data *)state.ts.dyn_ltree, lcodes-1); /* send the literal tree */
1149 Trace("\nlit tree: sent %ld", state.bs.bits_sent);
1150
1151 send_tree(state,(ct_data *)state.ts.dyn_dtree, dcodes-1); /* send the distance tree */
1152 Trace("\ndist tree: sent %ld", state.bs.bits_sent);
1153 }
1154
1155 /* ===========================================================================
1156 * Determine the best encoding for the current block: dynamic trees, static
1157 * trees or store, and output the encoded block to the zip file. This function
1158 * returns the total compressed length (in bytes) for the file so far.
1159 */
flush_block(TState & state,char * buf,ulg stored_len,int eof)1160 ulg flush_block(TState &state,char *buf, ulg stored_len, int eof)
1161 {
1162 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
1163 int max_blindex; /* index of last bit length code of non zero freq */
1164
1165 state.ts.flag_buf[state.ts.last_flags] = state.ts.flags; /* Save the flags for the last 8 items */
1166
1167 /* Check if the file is ascii or binary */
1168 if (*state.ts.file_type == (ush)UNKNOWN) set_file_type(state);
1169
1170 /* Construct the literal and distance trees */
1171 build_tree(state,(tree_desc *)(&state.ts.l_desc));
1172 Trace("\nlit data: dyn %ld, stat %ld", state.ts.opt_len, state.ts.static_len);
1173
1174 build_tree(state,(tree_desc *)(&state.ts.d_desc));
1175 Trace("\ndist data: dyn %ld, stat %ld", state.ts.opt_len, state.ts.static_len);
1176 /* At this point, opt_len and static_len are the total bit lengths of
1177 * the compressed block data, excluding the tree representations.
1178 */
1179
1180 /* Build the bit length tree for the above two trees, and get the index
1181 * in bl_order of the last bit length code to send.
1182 */
1183 max_blindex = build_bl_tree(state);
1184
1185 /* Determine the best encoding. Compute first the block length in bytes */
1186 opt_lenb = (state.ts.opt_len+3+7)>>3;
1187 static_lenb = (state.ts.static_len+3+7)>>3;
1188 state.ts.input_len += stored_len; /* for debugging only */
1189
1190 Trace("\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ",
1191 opt_lenb, state.ts.opt_len, static_lenb, state.ts.static_len, stored_len,
1192 state.ts.last_lit, state.ts.last_dist);
1193
1194 if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
1195
1196 // Originally, zip allowed the file to be transformed from a compressed
1197 // into a stored file in the case where compression failed, there
1198 // was only one block, and it was allowed to change. I've removed this
1199 // possibility since the code's cleaner if no changes are allowed.
1200 //if (stored_len <= opt_lenb && eof && state.ts.cmpr_bytelen == 0L
1201 // && state.ts.cmpr_len_bits == 0L && state.seekable)
1202 //{ // && state.ts.file_method != NULL
1203 // // Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there:
1204 // Assert(state,buf!=NULL,"block vanished");
1205 // copy_block(state,buf, (unsigned)stored_len, 0); // without header
1206 // state.ts.cmpr_bytelen = stored_len;
1207 // Assert(state,false,"unimplemented *state.ts.file_method = STORE;");
1208 // //*state.ts.file_method = STORE;
1209 //}
1210 //else
1211 if (stored_len+4 <= opt_lenb && buf != (char*)NULL) {
1212 /* 4: two words for the lengths */
1213 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
1214 * Otherwise we can't have processed more than WSIZE input bytes since
1215 * the last block flush, because compression would have been
1216 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
1217 * transform a block into a stored block.
1218 */
1219 send_bits(state,(STORED_BLOCK<<1)+eof, 3); /* send block type */
1220 state.ts.cmpr_bytelen += ((state.ts.cmpr_len_bits + 3 + 7) >> 3) + stored_len + 4;
1221 state.ts.cmpr_len_bits = 0L;
1222
1223 copy_block(state,buf, (unsigned)stored_len, 1); /* with header */
1224 }
1225 else if (static_lenb == opt_lenb) {
1226 send_bits(state,(STATIC_TREES<<1)+eof, 3);
1227 compress_block(state,(ct_data *)state.ts.static_ltree, (ct_data *)state.ts.static_dtree);
1228 state.ts.cmpr_len_bits += 3 + state.ts.static_len;
1229 state.ts.cmpr_bytelen += state.ts.cmpr_len_bits >> 3;
1230 state.ts.cmpr_len_bits &= 7L;
1231 }
1232 else {
1233 send_bits(state,(DYN_TREES<<1)+eof, 3);
1234 send_all_trees(state,state.ts.l_desc.max_code+1, state.ts.d_desc.max_code+1, max_blindex+1);
1235 compress_block(state,(ct_data *)state.ts.dyn_ltree, (ct_data *)state.ts.dyn_dtree);
1236 state.ts.cmpr_len_bits += 3 + state.ts.opt_len;
1237 state.ts.cmpr_bytelen += state.ts.cmpr_len_bits >> 3;
1238 state.ts.cmpr_len_bits &= 7L;
1239 }
1240 Assert(state,((state.ts.cmpr_bytelen << 3) + state.ts.cmpr_len_bits) == state.bs.bits_sent, "bad compressed size");
1241 init_block(state);
1242
1243 if (eof) {
1244 // Assert(state,input_len == isize, "bad input size");
1245 bi_windup(state);
1246 state.ts.cmpr_len_bits += 7; /* align on byte boundary */
1247 }
1248 Trace("\n");
1249
1250 return state.ts.cmpr_bytelen + (state.ts.cmpr_len_bits >> 3);
1251 }
1252
1253 /* ===========================================================================
1254 * Save the match info and tally the frequency counts. Return true if
1255 * the current block must be flushed.
1256 */
ct_tally(TState & state,int dist,int lc)1257 int ct_tally (TState &state,int dist, int lc)
1258 {
1259 state.ts.l_buf[state.ts.last_lit++] = (uch)lc;
1260 if (dist == 0) {
1261 /* lc is the unmatched char */
1262 state.ts.dyn_ltree[lc].fc.freq++;
1263 } else {
1264 /* Here, lc is the match length - MIN_MATCH */
1265 dist--; /* dist = match distance - 1 */
1266 Assert(state,(ush)dist < (ush)MAX_DIST &&
1267 (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
1268 (ush)d_code(dist) < (ush)D_CODES, "ct_tally: bad match");
1269
1270 state.ts.dyn_ltree[state.ts.length_code[lc]+LITERALS+1].fc.freq++;
1271 state.ts.dyn_dtree[d_code(dist)].fc.freq++;
1272
1273 state.ts.d_buf[state.ts.last_dist++] = (ush)dist;
1274 state.ts.flags |= state.ts.flag_bit;
1275 }
1276 state.ts.flag_bit <<= 1;
1277
1278 /* Output the flags if they fill a byte: */
1279 if ((state.ts.last_lit & 7) == 0) {
1280 state.ts.flag_buf[state.ts.last_flags++] = state.ts.flags;
1281 state.ts.flags = 0, state.ts.flag_bit = 1;
1282 }
1283 /* Try to guess if it is profitable to stop the current block here */
1284 if (state.level > 2 && (state.ts.last_lit & 0xfff) == 0) {
1285 /* Compute an upper bound for the compressed length */
1286 ulg out_length = (ulg)state.ts.last_lit*8L;
1287 ulg in_length = (ulg)state.ds.strstart-state.ds.block_start;
1288 int dcode;
1289 for (dcode = 0; dcode < D_CODES; dcode++) {
1290 out_length += (ulg)state.ts.dyn_dtree[dcode].fc.freq*(5L+extra_dbits[dcode]);
1291 }
1292 out_length >>= 3;
1293 Trace("\nlast_lit %u, last_dist %u, in %ld, out ~%ld(%ld%%) ",
1294 state.ts.last_lit, state.ts.last_dist, in_length, out_length,
1295 100L - out_length*100L/in_length);
1296 if (state.ts.last_dist < state.ts.last_lit/2 && out_length < in_length/2) return 1;
1297 }
1298 return (state.ts.last_lit == LIT_BUFSIZE-1 || state.ts.last_dist == DIST_BUFSIZE);
1299 /* We avoid equality with LIT_BUFSIZE because of wraparound at 64K
1300 * on 16 bit machines and because stored blocks are restricted to
1301 * 64K-1 bytes.
1302 */
1303 }
1304
1305 /* ===========================================================================
1306 * Send the block data compressed using the given Huffman trees
1307 */
compress_block(TState & state,ct_data * ltree,ct_data * dtree)1308 void compress_block(TState &state,ct_data *ltree, ct_data *dtree)
1309 {
1310 unsigned dist; /* distance of matched string */
1311 int lc; /* match length or unmatched char (if dist == 0) */
1312 unsigned lx = 0; /* running index in l_buf */
1313 unsigned dx = 0; /* running index in d_buf */
1314 unsigned fx = 0; /* running index in flag_buf */
1315 uch flag = 0; /* current flags */
1316 unsigned code; /* the code to send */
1317 int extra; /* number of extra bits to send */
1318
1319 if (state.ts.last_lit != 0) do {
1320 if ((lx & 7) == 0) flag = state.ts.flag_buf[fx++];
1321 lc = state.ts.l_buf[lx++];
1322 if ((flag & 1) == 0) {
1323 send_code(state,lc, ltree); /* send a literal byte */
1324 } else {
1325 /* Here, lc is the match length - MIN_MATCH */
1326 code = state.ts.length_code[lc];
1327 send_code(state,code+LITERALS+1, ltree); /* send the length code */
1328 extra = extra_lbits[code];
1329 if (extra != 0) {
1330 lc -= state.ts.base_length[code];
1331 send_bits(state,lc, extra); /* send the extra length bits */
1332 }
1333 dist = state.ts.d_buf[dx++];
1334 /* Here, dist is the match distance - 1 */
1335 code = d_code(dist);
1336 Assert(state,code < D_CODES, "bad d_code");
1337
1338 send_code(state,code, dtree); /* send the distance code */
1339 extra = extra_dbits[code];
1340 if (extra != 0) {
1341 dist -= state.ts.base_dist[code];
1342 send_bits(state,dist, extra); /* send the extra distance bits */
1343 }
1344 } /* literal or match pair ? */
1345 flag >>= 1;
1346 } while (lx < state.ts.last_lit);
1347
1348 send_code(state,END_BLOCK, ltree);
1349 }
1350
1351 /* ===========================================================================
1352 * Set the file type to ASCII or BINARY, using a crude approximation:
1353 * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
1354 * IN assertion: the fields freq of dyn_ltree are set and the total of all
1355 * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
1356 */
set_file_type(TState & state)1357 void set_file_type(TState &state)
1358 {
1359 int n = 0;
1360 unsigned ascii_freq = 0;
1361 unsigned bin_freq = 0;
1362 while (n < 7) bin_freq += state.ts.dyn_ltree[n++].fc.freq;
1363 while (n < 128) ascii_freq += state.ts.dyn_ltree[n++].fc.freq;
1364 while (n < LITERALS) bin_freq += state.ts.dyn_ltree[n++].fc.freq;
1365 *state.ts.file_type = (ush)(bin_freq > (ascii_freq >> 2) ? BINARY : ASCII);
1366 }
1367
1368
1369 /* ===========================================================================
1370 * Initialize the bit string routines.
1371 */
bi_init(TState & state,char * tgt_buf,unsigned tgt_size,int flsh_allowed)1372 void bi_init (TState &state,char *tgt_buf, unsigned tgt_size, int flsh_allowed)
1373 {
1374 state.bs.out_buf = tgt_buf;
1375 state.bs.out_size = tgt_size;
1376 state.bs.out_offset = 0;
1377 state.bs.flush_flg = flsh_allowed;
1378
1379 state.bs.bi_buf = 0;
1380 state.bs.bi_valid = 0;
1381 state.bs.bits_sent = 0L;
1382 }
1383
1384 /* ===========================================================================
1385 * Send a value on a given number of bits.
1386 * IN assertion: length <= 16 and value fits in length bits.
1387 */
send_bits(TState & state,int value,int length)1388 void send_bits(TState &state,int value, int length)
1389 {
1390 Assert(state,length > 0 && length <= 15, "invalid length");
1391 state.bs.bits_sent += (ulg)length;
1392 /* If not enough room in bi_buf, use (bi_valid) bits from bi_buf and
1393 * (Buf_size - bi_valid) bits from value to flush the filled bi_buf,
1394 * then fill in the rest of (value), leaving (length - (Buf_size-bi_valid))
1395 * unused bits in bi_buf.
1396 */
1397 state.bs.bi_buf |= (value << state.bs.bi_valid);
1398 state.bs.bi_valid += length;
1399 if (state.bs.bi_valid > (int)Buf_size) {
1400 PUTSHORT(state,state.bs.bi_buf);
1401 state.bs.bi_valid -= Buf_size;
1402 state.bs.bi_buf = (unsigned)value >> (length - state.bs.bi_valid);
1403 }
1404 }
1405
1406 /* ===========================================================================
1407 * Reverse the first len bits of a code, using straightforward code (a faster
1408 * method would use a table)
1409 * IN assertion: 1 <= len <= 15
1410 */
bi_reverse(unsigned code,int len)1411 unsigned bi_reverse(unsigned code, int len)
1412 {
1413 register unsigned res = 0;
1414 do {
1415 res |= code & 1;
1416 code >>= 1, res <<= 1;
1417 } while (--len > 0);
1418 return res >> 1;
1419 }
1420
1421 /* ===========================================================================
1422 * Write out any remaining bits in an incomplete byte.
1423 */
bi_windup(TState & state)1424 void bi_windup(TState &state)
1425 {
1426 if (state.bs.bi_valid > 8) {
1427 PUTSHORT(state,state.bs.bi_buf);
1428 } else if (state.bs.bi_valid > 0) {
1429 PUTBYTE(state,state.bs.bi_buf);
1430 }
1431 if (state.bs.flush_flg) {
1432 state.flush_outbuf(state.param,state.bs.out_buf, &state.bs.out_offset);
1433 }
1434 state.bs.bi_buf = 0;
1435 state.bs.bi_valid = 0;
1436 state.bs.bits_sent = (state.bs.bits_sent+7) & ~7;
1437 }
1438
1439 /* ===========================================================================
1440 * Copy a stored block to the zip file, storing first the length and its
1441 * one's complement if requested.
1442 */
copy_block(TState & state,char * block,unsigned len,int header)1443 void copy_block(TState &state, char *block, unsigned len, int header)
1444 {
1445 bi_windup(state); /* align on byte boundary */
1446
1447 if (header) {
1448 PUTSHORT(state,(ush)len);
1449 PUTSHORT(state,(ush)~len);
1450 state.bs.bits_sent += 2*16;
1451 }
1452 if (state.bs.flush_flg) {
1453 state.flush_outbuf(state.param,state.bs.out_buf, &state.bs.out_offset);
1454 state.bs.out_offset = len;
1455 state.flush_outbuf(state.param,block, &state.bs.out_offset);
1456 } else if (state.bs.out_offset + len > state.bs.out_size) {
1457 Assert(state,false,"output buffer too small for in-memory compression");
1458 } else {
1459 memcpy(state.bs.out_buf + state.bs.out_offset, block, len);
1460 state.bs.out_offset += len;
1461 }
1462 state.bs.bits_sent += (ulg)len<<3;
1463 }
1464
1465
1466
1467
1468
1469
1470
1471
1472 /* ===========================================================================
1473 * Prototypes for functions.
1474 */
1475
1476 void fill_window (TState &state);
1477 ulg deflate_fast (TState &state);
1478
1479 int longest_match (TState &state,IPos cur_match);
1480
1481
1482 /* ===========================================================================
1483 * Update a hash value with the given input byte
1484 * IN assertion: all calls to to UPDATE_HASH are made with consecutive
1485 * input characters, so that a running hash key can be computed from the
1486 * previous key instead of complete recalculation each time.
1487 */
1488 #define UPDATE_HASH(h,c) (h = (((h)<<H_SHIFT) ^ (c)) & HASH_MASK)
1489
1490 /* ===========================================================================
1491 * Insert string s in the dictionary and set match_head to the previous head
1492 * of the hash chain (the most recent string with same hash key). Return
1493 * the previous length of the hash chain.
1494 * IN assertion: all calls to to INSERT_STRING are made with consecutive
1495 * input characters and the first MIN_MATCH bytes of s are valid
1496 * (except for the last MIN_MATCH-1 bytes of the input file).
1497 */
1498 #define INSERT_STRING(s, match_head) \
1499 (UPDATE_HASH(state.ds.ins_h, state.ds.window[(s) + (MIN_MATCH-1)]), \
1500 state.ds.prev[(s) & WMASK] = match_head = state.ds.head[state.ds.ins_h], \
1501 state.ds.head[state.ds.ins_h] = (s))
1502
1503 /* ===========================================================================
1504 * Initialize the "longest match" routines for a new file
1505 *
1506 * IN assertion: window_size is > 0 if the input file is already read or
1507 * mmap'ed in the window[] array, 0 otherwise. In the first case,
1508 * window_size is sufficient to contain the whole input file plus
1509 * MIN_LOOKAHEAD bytes (to avoid referencing memory beyond the end
1510 * of window[] when looking for matches towards the end).
1511 */
lm_init(TState & state,int pack_level,ush * flags)1512 void lm_init (TState &state, int pack_level, ush *flags)
1513 {
1514 register unsigned j;
1515
1516 Assert(state,pack_level>=1 && pack_level<=8,"bad pack level");
1517
1518 /* Do not slide the window if the whole input is already in memory
1519 * (window_size > 0)
1520 */
1521 state.ds.sliding = 0;
1522 if (state.ds.window_size == 0L) {
1523 state.ds.sliding = 1;
1524 state.ds.window_size = (ulg)2L*WSIZE;
1525 }
1526
1527 /* Initialize the hash table (avoiding 64K overflow for 16 bit systems).
1528 * prev[] will be initialized on the fly.
1529 */
1530 state.ds.head[HASH_SIZE-1] = NIL;
1531 memset((char*)state.ds.head, NIL, (unsigned)(HASH_SIZE-1)*sizeof(*state.ds.head));
1532
1533 /* Set the default configuration parameters:
1534 */
1535 state.ds.max_lazy_match = configuration_table[pack_level].max_lazy;
1536 state.ds.good_match = configuration_table[pack_level].good_length;
1537 state.ds.nice_match = configuration_table[pack_level].nice_length;
1538 state.ds.max_chain_length = configuration_table[pack_level].max_chain;
1539 if (pack_level <= 2) {
1540 *flags |= FAST;
1541 } else if (pack_level >= 8) {
1542 *flags |= SLOW;
1543 }
1544 /* ??? reduce max_chain_length for binary files */
1545
1546 state.ds.strstart = 0;
1547 state.ds.block_start = 0L;
1548
1549 j = WSIZE;
1550 j <<= 1; // Can read 64K in one step
1551 state.ds.lookahead = state.readfunc(state, (char*)state.ds.window, j);
1552
1553 if (state.ds.lookahead == 0 || state.ds.lookahead == (unsigned)EOF) {
1554 state.ds.eofile = 1, state.ds.lookahead = 0;
1555 return;
1556 }
1557 state.ds.eofile = 0;
1558 /* Make sure that we always have enough lookahead. This is important
1559 * if input comes from a device such as a tty.
1560 */
1561 if (state.ds.lookahead < MIN_LOOKAHEAD) fill_window(state);
1562
1563 state.ds.ins_h = 0;
1564 for (j=0; j<MIN_MATCH-1; j++) UPDATE_HASH(state.ds.ins_h, state.ds.window[j]);
1565 /* If lookahead < MIN_MATCH, ins_h is garbage, but this is
1566 * not important since only literal bytes will be emitted.
1567 */
1568 }
1569
1570
1571 /* ===========================================================================
1572 * Set match_start to the longest match starting at the given string and
1573 * return its length. Matches shorter or equal to prev_length are discarded,
1574 * in which case the result is equal to prev_length and match_start is
1575 * garbage.
1576 * IN assertions: cur_match is the head of the hash chain for the current
1577 * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
1578 */
1579 // For 80x86 and 680x0 and ARM, an optimized version is in match.asm or
1580 // match.S. The code is functionally equivalent, so you can use the C version
1581 // if desired. Which I do so desire!
longest_match(TState & state,IPos cur_match)1582 int longest_match(TState &state,IPos cur_match)
1583 {
1584 unsigned chain_length = state.ds.max_chain_length; /* max hash chain length */
1585 register uch far *scan = state.ds.window + state.ds.strstart; /* current string */
1586 register uch far *match; /* matched string */
1587 register int len; /* length of current match */
1588 int best_len = state.ds.prev_length; /* best match length so far */
1589 IPos limit = state.ds.strstart > (IPos)MAX_DIST ? state.ds.strstart - (IPos)MAX_DIST : NIL;
1590 /* Stop when cur_match becomes <= limit. To simplify the code,
1591 * we prevent matches with the string of window index 0.
1592 */
1593
1594 // The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
1595 // It is easy to get rid of this optimization if necessary.
1596 Assert(state,HASH_BITS>=8 && MAX_MATCH==258,"Code too clever");
1597
1598
1599
1600 register uch far *strend = state.ds.window + state.ds.strstart + MAX_MATCH;
1601 register uch scan_end1 = scan[best_len-1];
1602 register uch scan_end = scan[best_len];
1603
1604 /* Do not waste too much time if we already have a good match: */
1605 if (state.ds.prev_length >= state.ds.good_match) {
1606 chain_length >>= 2;
1607 }
1608
1609 Assert(state,state.ds.strstart <= state.ds.window_size-MIN_LOOKAHEAD, "insufficient lookahead");
1610
1611 do {
1612 Assert(state,cur_match < state.ds.strstart, "no future");
1613 match = state.ds.window + cur_match;
1614
1615 /* Skip to next match if the match length cannot increase
1616 * or if the match length is less than 2:
1617 */
1618 if (match[best_len] != scan_end ||
1619 match[best_len-1] != scan_end1 ||
1620 *match != *scan ||
1621 *++match != scan[1]) continue;
1622
1623 /* The check at best_len-1 can be removed because it will be made
1624 * again later. (This heuristic is not always a win.)
1625 * It is not necessary to compare scan[2] and match[2] since they
1626 * are always equal when the other bytes match, given that
1627 * the hash keys are equal and that HASH_BITS >= 8.
1628 */
1629 scan += 2, match++;
1630
1631 /* We check for insufficient lookahead only every 8th comparison;
1632 * the 256th check will be made at strstart+258.
1633 */
1634 do {
1635 } while (*++scan == *++match && *++scan == *++match &&
1636 *++scan == *++match && *++scan == *++match &&
1637 *++scan == *++match && *++scan == *++match &&
1638 *++scan == *++match && *++scan == *++match &&
1639 scan < strend);
1640
1641 Assert(state,scan <= state.ds.window+(unsigned)(state.ds.window_size-1), "wild scan");
1642
1643 len = MAX_MATCH - (int)(strend - scan);
1644 scan = strend - MAX_MATCH;
1645
1646
1647 if (len > best_len) {
1648 state.ds.match_start = cur_match;
1649 best_len = len;
1650 if (len >= state.ds.nice_match) break;
1651 scan_end1 = scan[best_len-1];
1652 scan_end = scan[best_len];
1653 }
1654 } while ((cur_match = state.ds.prev[cur_match & WMASK]) > limit
1655 && --chain_length != 0);
1656
1657 return best_len;
1658 }
1659
1660
1661
1662 #define check_match(state,start, match, length)
1663 // or alternatively...
1664 //void check_match(TState &state,IPos start, IPos match, int length)
1665 //{ // check that the match is indeed a match
1666 // if (memcmp((char*)state.ds.window + match,
1667 // (char*)state.ds.window + start, length) != EQUAL) {
1668 // fprintf(stderr,
1669 // " start %d, match %d, length %d\n",
1670 // start, match, length);
1671 // error("invalid match");
1672 // }
1673 // if (state.verbose > 1) {
1674 // fprintf(stderr,"\\[%d,%d]", start-match, length);
1675 // do { fprintf(stdout,"%c",state.ds.window[start++]); } while (--length != 0);
1676 // }
1677 //}
1678
1679 /* ===========================================================================
1680 * Fill the window when the lookahead becomes insufficient.
1681 * Updates strstart and lookahead, and sets eofile if end of input file.
1682 *
1683 * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0
1684 * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
1685 * At least one byte has been read, or eofile is set; file reads are
1686 * performed for at least two bytes (required for the translate_eol option).
1687 */
fill_window(TState & state)1688 void fill_window(TState &state)
1689 {
1690 register unsigned n, m;
1691 unsigned more; /* Amount of free space at the end of the window. */
1692
1693 do {
1694 more = (unsigned)(state.ds.window_size - (ulg)state.ds.lookahead - (ulg)state.ds.strstart);
1695
1696 /* If the window is almost full and there is insufficient lookahead,
1697 * move the upper half to the lower one to make room in the upper half.
1698 */
1699 if (more == (unsigned)EOF) {
1700 /* Very unlikely, but possible on 16 bit machine if strstart == 0
1701 * and lookahead == 1 (input done one byte at time)
1702 */
1703 more--;
1704
1705 /* For MMAP or BIG_MEM, the whole input file is already in memory so
1706 * we must not perform sliding. We must however call (*read_buf)() in
1707 * order to compute the crc, update lookahead and possibly set eofile.
1708 */
1709 } else if (state.ds.strstart >= WSIZE+MAX_DIST && state.ds.sliding) {
1710
1711 /* By the IN assertion, the window is not empty so we can't confuse
1712 * more == 0 with more == 64K on a 16 bit machine.
1713 */
1714 memcpy((char*)state.ds.window, (char*)state.ds.window+WSIZE, (unsigned)WSIZE);
1715 state.ds.match_start -= WSIZE;
1716 state.ds.strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
1717
1718 state.ds.block_start -= (long) WSIZE;
1719
1720 for (n = 0; n < HASH_SIZE; n++) {
1721 m = state.ds.head[n];
1722 state.ds.head[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);
1723 }
1724 for (n = 0; n < WSIZE; n++) {
1725 m = state.ds.prev[n];
1726 state.ds.prev[n] = (Pos)(m >= WSIZE ? m-WSIZE : NIL);
1727 /* If n is not on any hash chain, prev[n] is garbage but
1728 * its value will never be used.
1729 */
1730 }
1731 more += WSIZE;
1732 }
1733 if (state.ds.eofile) return;
1734
1735 /* If there was no sliding:
1736 * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
1737 * more == window_size - lookahead - strstart
1738 * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
1739 * => more >= window_size - 2*WSIZE + 2
1740 * In the MMAP or BIG_MEM case (not yet supported in gzip),
1741 * window_size == input_size + MIN_LOOKAHEAD &&
1742 * strstart + lookahead <= input_size => more >= MIN_LOOKAHEAD.
1743 * Otherwise, window_size == 2*WSIZE so more >= 2.
1744 * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
1745 */
1746 Assert(state,more >= 2, "more < 2");
1747
1748 n = state.readfunc(state, (char*)state.ds.window+state.ds.strstart+state.ds.lookahead, more);
1749
1750 if (n == 0 || n == (unsigned)EOF) {
1751 state.ds.eofile = 1;
1752 } else {
1753 state.ds.lookahead += n;
1754 }
1755 } while (state.ds.lookahead < MIN_LOOKAHEAD && !state.ds.eofile);
1756 }
1757
1758 /* ===========================================================================
1759 * Flush the current block, with given end-of-file flag.
1760 * IN assertion: strstart is set to the end of the current match.
1761 */
1762 #define FLUSH_BLOCK(state,eof) \
1763 flush_block(state,state.ds.block_start >= 0L ? (char*)&state.ds.window[(unsigned)state.ds.block_start] : \
1764 (char*)NULL, (long)state.ds.strstart - state.ds.block_start, (eof))
1765
1766 /* ===========================================================================
1767 * Processes a new input file and return its compressed length. This
1768 * function does not perform lazy evaluation of matches and inserts
1769 * new strings in the dictionary only for unmatched strings or for short
1770 * matches. It is used only for the fast compression options.
1771 */
deflate_fast(TState & state)1772 ulg deflate_fast(TState &state)
1773 {
1774 IPos hash_head = NIL; /* head of the hash chain */
1775 int flush; /* set if current block must be flushed */
1776 unsigned match_length = 0; /* length of best match */
1777
1778 state.ds.prev_length = MIN_MATCH-1;
1779 while (state.ds.lookahead != 0) {
1780 /* Insert the string window[strstart .. strstart+2] in the
1781 * dictionary, and set hash_head to the head of the hash chain:
1782 */
1783 if (state.ds.lookahead >= MIN_MATCH)
1784 INSERT_STRING(state.ds.strstart, hash_head);
1785
1786 /* Find the longest match, discarding those <= prev_length.
1787 * At this point we have always match_length < MIN_MATCH
1788 */
1789 if (hash_head != NIL && state.ds.strstart - hash_head <= MAX_DIST) {
1790 /* To simplify the code, we prevent matches with the string
1791 * of window index 0 (in particular we have to avoid a match
1792 * of the string with itself at the start of the input file).
1793 */
1794 /* Do not look for matches beyond the end of the input.
1795 * This is necessary to make deflate deterministic.
1796 */
1797 if ((unsigned)state.ds.nice_match > state.ds.lookahead) state.ds.nice_match = (int)state.ds.lookahead;
1798 match_length = longest_match (state,hash_head);
1799 /* longest_match() sets match_start */
1800 if (match_length > state.ds.lookahead) match_length = state.ds.lookahead;
1801 }
1802 if (match_length >= MIN_MATCH) {
1803 check_match(state,state.ds.strstart, state.ds.match_start, match_length);
1804
1805 flush = ct_tally(state,state.ds.strstart-state.ds.match_start, match_length - MIN_MATCH);
1806
1807 state.ds.lookahead -= match_length;
1808
1809 /* Insert new strings in the hash table only if the match length
1810 * is not too large. This saves time but degrades compression.
1811 */
1812 if (match_length <= state.ds.max_insert_length
1813 && state.ds.lookahead >= MIN_MATCH) {
1814 match_length--; /* string at strstart already in hash table */
1815 do {
1816 state.ds.strstart++;
1817 INSERT_STRING(state.ds.strstart, hash_head);
1818 /* strstart never exceeds WSIZE-MAX_MATCH, so there are
1819 * always MIN_MATCH bytes ahead.
1820 */
1821 } while (--match_length != 0);
1822 state.ds.strstart++;
1823 } else {
1824 state.ds.strstart += match_length;
1825 match_length = 0;
1826 state.ds.ins_h = state.ds.window[state.ds.strstart];
1827 UPDATE_HASH(state.ds.ins_h, state.ds.window[state.ds.strstart+1]);
1828 Assert(state,MIN_MATCH==3,"Call UPDATE_HASH() MIN_MATCH-3 more times");
1829 }
1830 } else {
1831 /* No match, output a literal byte */
1832 flush = ct_tally (state,0, state.ds.window[state.ds.strstart]);
1833 state.ds.lookahead--;
1834 state.ds.strstart++;
1835 }
1836 if (flush) FLUSH_BLOCK(state,0), state.ds.block_start = state.ds.strstart;
1837
1838 /* Make sure that we always have enough lookahead, except
1839 * at the end of the input file. We need MAX_MATCH bytes
1840 * for the next match, plus MIN_MATCH bytes to insert the
1841 * string following the next match.
1842 */
1843 if (state.ds.lookahead < MIN_LOOKAHEAD) fill_window(state);
1844 }
1845 return FLUSH_BLOCK(state,1); /* eof */
1846 }
1847
1848 /* ===========================================================================
1849 * Same as above, but achieves better compression. We use a lazy
1850 * evaluation for matches: a match is finally adopted only if there is
1851 * no better match at the next window position.
1852 */
deflate(TState & state)1853 ulg deflate(TState &state)
1854 {
1855 IPos hash_head = NIL; /* head of hash chain */
1856 IPos prev_match; /* previous match */
1857 int flush; /* set if current block must be flushed */
1858 int match_available = 0; /* set if previous match exists */
1859 register unsigned match_length = MIN_MATCH-1; /* length of best match */
1860
1861 if (state.level <= 3) return deflate_fast(state); /* optimized for speed */
1862
1863 /* Process the input block. */
1864 while (state.ds.lookahead != 0) {
1865 /* Insert the string window[strstart .. strstart+2] in the
1866 * dictionary, and set hash_head to the head of the hash chain:
1867 */
1868 if (state.ds.lookahead >= MIN_MATCH)
1869 INSERT_STRING(state.ds.strstart, hash_head);
1870
1871 /* Find the longest match, discarding those <= prev_length.
1872 */
1873 state.ds.prev_length = match_length, prev_match = state.ds.match_start;
1874 match_length = MIN_MATCH-1;
1875
1876 if (hash_head != NIL && state.ds.prev_length < state.ds.max_lazy_match &&
1877 state.ds.strstart - hash_head <= MAX_DIST) {
1878 /* To simplify the code, we prevent matches with the string
1879 * of window index 0 (in particular we have to avoid a match
1880 * of the string with itself at the start of the input file).
1881 */
1882 /* Do not look for matches beyond the end of the input.
1883 * This is necessary to make deflate deterministic.
1884 */
1885 if ((unsigned)state.ds.nice_match > state.ds.lookahead) state.ds.nice_match = (int)state.ds.lookahead;
1886 match_length = longest_match (state,hash_head);
1887 /* longest_match() sets match_start */
1888 if (match_length > state.ds.lookahead) match_length = state.ds.lookahead;
1889
1890 /* Ignore a length 3 match if it is too distant: */
1891 if (match_length == MIN_MATCH && state.ds.strstart-state.ds.match_start > TOO_FAR){
1892 /* If prev_match is also MIN_MATCH, match_start is garbage
1893 * but we will ignore the current match anyway.
1894 */
1895 match_length = MIN_MATCH-1;
1896 }
1897 }
1898 /* If there was a match at the previous step and the current
1899 * match is not better, output the previous match:
1900 */
1901 if (state.ds.prev_length >= MIN_MATCH && match_length <= state.ds.prev_length) {
1902 unsigned max_insert = state.ds.strstart + state.ds.lookahead - MIN_MATCH;
1903 check_match(state,state.ds.strstart-1, prev_match, state.ds.prev_length);
1904 flush = ct_tally(state,state.ds.strstart-1-prev_match, state.ds.prev_length - MIN_MATCH);
1905
1906 /* Insert in hash table all strings up to the end of the match.
1907 * strstart-1 and strstart are already inserted.
1908 */
1909 state.ds.lookahead -= state.ds.prev_length-1;
1910 state.ds.prev_length -= 2;
1911 do {
1912 if (++state.ds.strstart <= max_insert) {
1913 INSERT_STRING(state.ds.strstart, hash_head);
1914 /* strstart never exceeds WSIZE-MAX_MATCH, so there are
1915 * always MIN_MATCH bytes ahead.
1916 */
1917 }
1918 } while (--state.ds.prev_length != 0);
1919 state.ds.strstart++;
1920 match_available = 0;
1921 match_length = MIN_MATCH-1;
1922
1923 if (flush) FLUSH_BLOCK(state,0), state.ds.block_start = state.ds.strstart;
1924
1925 } else if (match_available) {
1926 /* If there was no match at the previous position, output a
1927 * single literal. If there was a match but the current match
1928 * is longer, truncate the previous match to a single literal.
1929 */
1930 if (ct_tally (state,0, state.ds.window[state.ds.strstart-1])) {
1931 FLUSH_BLOCK(state,0), state.ds.block_start = state.ds.strstart;
1932 }
1933 state.ds.strstart++;
1934 state.ds.lookahead--;
1935 } else {
1936 /* There is no previous match to compare with, wait for
1937 * the next step to decide.
1938 */
1939 match_available = 1;
1940 state.ds.strstart++;
1941 state.ds.lookahead--;
1942 }
1943 // Assert(state,strstart <= isize && lookahead <= isize, "a bit too far");
1944
1945 /* Make sure that we always have enough lookahead, except
1946 * at the end of the input file. We need MAX_MATCH bytes
1947 * for the next match, plus MIN_MATCH bytes to insert the
1948 * string following the next match.
1949 */
1950 if (state.ds.lookahead < MIN_LOOKAHEAD) fill_window(state);
1951 }
1952 if (match_available) ct_tally (state,0, state.ds.window[state.ds.strstart-1]);
1953
1954 return FLUSH_BLOCK(state,1); /* eof */
1955 }
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
putlocal(struct zlist far * z,WRITEFUNC wfunc,void * param)1968 int putlocal(struct zlist far *z, WRITEFUNC wfunc,void *param)
1969 { // Write a local header described by *z to file *f. Return a ZE_ error code.
1970 PUTLG(LOCSIG, f);
1971 PUTSH(z->ver, f);
1972 PUTSH(z->lflg, f);
1973 PUTSH(z->how, f);
1974 PUTLG(z->tim, f);
1975 PUTLG(z->crc, f);
1976 PUTLG(z->siz, f);
1977 PUTLG(z->len, f);
1978 PUTSH(z->nam, f);
1979 PUTSH(z->ext, f);
1980 size_t res = (size_t)wfunc(param, z->iname, (unsigned int)z->nam);
1981 if (res!=z->nam) return ZE_TEMP;
1982 if (z->ext)
1983 { res = (size_t)wfunc(param, z->extra, (unsigned int)z->ext);
1984 if (res!=z->ext) return ZE_TEMP;
1985 }
1986 return ZE_OK;
1987 }
1988
putextended(struct zlist far * z,WRITEFUNC wfunc,void * param)1989 int putextended(struct zlist far *z, WRITEFUNC wfunc, void *param)
1990 { // Write an extended local header described by *z to file *f. Returns a ZE_ code
1991 PUTLG(EXTLOCSIG, f);
1992 PUTLG(z->crc, f);
1993 PUTLG(z->siz, f);
1994 PUTLG(z->len, f);
1995 return ZE_OK;
1996 }
1997
putcentral(struct zlist far * z,WRITEFUNC wfunc,void * param)1998 int putcentral(struct zlist far *z, WRITEFUNC wfunc, void *param)
1999 { // Write a central header entry of *z to file *f. Returns a ZE_ code.
2000 PUTLG(CENSIG, f);
2001 PUTSH(z->vem, f);
2002 PUTSH(z->ver, f);
2003 PUTSH(z->flg, f);
2004 PUTSH(z->how, f);
2005 PUTLG(z->tim, f);
2006 PUTLG(z->crc, f);
2007 PUTLG(z->siz, f);
2008 PUTLG(z->len, f);
2009 PUTSH(z->nam, f);
2010 PUTSH(z->cext, f);
2011 PUTSH(z->com, f);
2012 PUTSH(z->dsk, f);
2013 PUTSH(z->att, f);
2014 PUTLG(z->atx, f);
2015 PUTLG(z->off, f);
2016 if ((size_t)wfunc(param, z->iname, (unsigned int)z->nam) != z->nam ||
2017 (z->cext && (size_t)wfunc(param, z->cextra, (unsigned int)z->cext) != z->cext) ||
2018 (z->com && (size_t)wfunc(param, z->comment, (unsigned int)z->com) != z->com))
2019 return ZE_TEMP;
2020 return ZE_OK;
2021 }
2022
2023
putend(int n,ulg s,ulg c,extent m,char * z,WRITEFUNC wfunc,void * param)2024 int putend(int n, ulg s, ulg c, extent m, char *z, WRITEFUNC wfunc, void *param)
2025 { // write the end of the central-directory-data to file *f.
2026 PUTLG(ENDSIG, f);
2027 PUTSH(0, f);
2028 PUTSH(0, f);
2029 PUTSH(n, f);
2030 PUTSH(n, f);
2031 PUTLG(s, f);
2032 PUTLG(c, f);
2033 PUTSH(m, f);
2034 // Write the comment, if any
2035 if (m && wfunc(param, z, (unsigned int)m) != m) return ZE_TEMP;
2036 return ZE_OK;
2037 }
2038
2039
2040
2041
2042
2043
2044 const ulg crc_table[256] = {
2045 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
2046 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
2047 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
2048 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
2049 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
2050 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
2051 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
2052 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
2053 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
2054 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
2055 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
2056 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
2057 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
2058 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
2059 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
2060 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
2061 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
2062 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
2063 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
2064 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
2065 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
2066 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
2067 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
2068 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
2069 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
2070 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
2071 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
2072 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
2073 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
2074 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
2075 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
2076 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
2077 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
2078 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
2079 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
2080 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
2081 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
2082 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
2083 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
2084 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
2085 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
2086 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
2087 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
2088 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
2089 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
2090 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
2091 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
2092 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
2093 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
2094 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
2095 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
2096 0x2d02ef8dL
2097 };
2098
2099 #define CRC32(c, b) (crc_table[((int)(c) ^ (b)) & 0xff] ^ ((c) >> 8))
2100 #define DO1(buf) crc = CRC32(crc, *buf++)
2101 #define DO2(buf) DO1(buf); DO1(buf)
2102 #define DO4(buf) DO2(buf); DO2(buf)
2103 #define DO8(buf) DO4(buf); DO4(buf)
2104
crc32(ulg crc,const uch * buf,extent len)2105 ulg crc32(ulg crc, const uch *buf, extent len)
2106 { if (buf==NULL) return 0L;
2107 crc = crc ^ 0xffffffffL;
2108 while (len >= 8) {DO8(buf); len -= 8;}
2109 if (len) do {DO1(buf);} while (--len);
2110 return crc ^ 0xffffffffL; // (instead of ~c for 64-bit machines)
2111 }
2112
2113
update_keys(unsigned long * keys,char c)2114 void update_keys(unsigned long *keys, char c)
2115 { keys[0] = CRC32(keys[0],c);
2116 keys[1] += keys[0] & 0xFF;
2117 keys[1] = keys[1]*134775813L +1;
2118 keys[2] = CRC32(keys[2], keys[1] >> 24);
2119 }
decrypt_byte(unsigned long * keys)2120 char decrypt_byte(unsigned long *keys)
2121 { unsigned temp = ((unsigned)keys[2] & 0xffff) | 2;
2122 return (char)(((temp * (temp ^ 1)) >> 8) & 0xff);
2123 }
zencode(unsigned long * keys,char c)2124 char zencode(unsigned long *keys, char c)
2125 { int t=decrypt_byte(keys);
2126 update_keys(keys,c);
2127 return (char)(t^c);
2128 }
2129
2130
2131
2132
2133
2134
2135
HasZipSuffix(const TCHAR * fn)2136 bool HasZipSuffix(const TCHAR *fn)
2137 { const TCHAR *ext = fn+_tcslen(fn);
2138 while (ext>fn && *ext!='.') ext--;
2139 if (ext==fn && *ext!='.') return false;
2140 if (_tcsicmp(ext,_T(".Z"))==0) return true;
2141 if (_tcsicmp(ext,_T(".zip"))==0) return true;
2142 if (_tcsicmp(ext,_T(".zoo"))==0) return true;
2143 if (_tcsicmp(ext,_T(".arc"))==0) return true;
2144 if (_tcsicmp(ext,_T(".lzh"))==0) return true;
2145 if (_tcsicmp(ext,_T(".arj"))==0) return true;
2146 if (_tcsicmp(ext,_T(".gz"))==0) return true;
2147 if (_tcsicmp(ext,_T(".tgz"))==0) return true;
2148 return false;
2149 }
2150
2151
filetime2timet(const FILETIME ft)2152 lutime_t filetime2timet(const FILETIME ft)
2153 { __int64 i = *(__int64*)&ft;
2154 return (lutime_t)((i-116444736000000000LL)/10000000);
2155 }
2156
filetime2dosdatetime(const FILETIME ft,WORD * dosdate,WORD * dostime)2157 void filetime2dosdatetime(const FILETIME ft, WORD *dosdate,WORD *dostime)
2158 { // date: bits 0-4 are day of month 1-31. Bits 5-8 are month 1..12. Bits 9-15 are year-1980
2159 // time: bits 0-4 are seconds/2, bits 5-10 are minute 0..59. Bits 11-15 are hour 0..23
2160 SYSTEMTIME st; FileTimeToSystemTime(&ft,&st);
2161 *dosdate = (WORD)(((st.wYear-1980)&0x7f) << 9);
2162 *dosdate |= (WORD)((st.wMonth&0xf) << 5);
2163 *dosdate |= (WORD)((st.wDay&0x1f));
2164 *dostime = (WORD)((st.wHour&0x1f) << 11);
2165 *dostime |= (WORD)((st.wMinute&0x3f) << 5);
2166 *dostime |= (WORD)((st.wSecond*2)&0x1f);
2167 }
2168
2169
GetFileInfo(HANDLE hf,ulg * attr,long * size,iztimes * times,ulg * timestamp)2170 ZRESULT GetFileInfo(HANDLE hf, ulg *attr, long *size, iztimes *times, ulg *timestamp)
2171 { // The handle must be a handle to a file
2172 // The date and time is returned in a long with the date most significant to allow
2173 // unsigned integer comparison of absolute times. The attributes have two
2174 // high bytes unix attr, and two low bytes a mapping of that to DOS attr.
2175 //struct stat s; int res=stat(fn,&s); if (res!=0) return false;
2176 // translate windows file attributes into zip ones.
2177 BY_HANDLE_FILE_INFORMATION bhi; BOOL res=GetFileInformationByHandle(hf,&bhi);
2178 if (!res) return ZR_NOFILE;
2179 DWORD fa=bhi.dwFileAttributes; ulg a=0;
2180 // Zip uses the lower word for its interpretation of windows stuff
2181 if (fa&FILE_ATTRIBUTE_READONLY) a|=0x01;
2182 if (fa&FILE_ATTRIBUTE_HIDDEN) a|=0x02;
2183 if (fa&FILE_ATTRIBUTE_SYSTEM) a|=0x04;
2184 if (fa&FILE_ATTRIBUTE_DIRECTORY)a|=0x10;
2185 if (fa&FILE_ATTRIBUTE_ARCHIVE) a|=0x20;
2186 // It uses the upper word for standard unix attr, which we manually construct
2187 if (fa&FILE_ATTRIBUTE_DIRECTORY)a|=0x40000000; // directory
2188 else a|=0x80000000; // normal file
2189 a|=0x01000000; // readable
2190 if (fa&FILE_ATTRIBUTE_READONLY) {} else a|=0x00800000; // writeable
2191 // now just a small heuristic to check if it's an executable:
2192 DWORD red, hsize=GetFileSize(hf,NULL); if (hsize>40)
2193 { SetFilePointer(hf,0,NULL,FILE_BEGIN); unsigned short magic; ReadFile(hf,&magic,sizeof(magic),&red,NULL);
2194 SetFilePointer(hf,36,NULL,FILE_BEGIN); unsigned long hpos; ReadFile(hf,&hpos,sizeof(hpos),&red,NULL);
2195 if (magic==0x54AD && hsize>hpos+4+20+28)
2196 { SetFilePointer(hf,hpos,NULL,FILE_BEGIN); unsigned long signature; ReadFile(hf,&signature,sizeof(signature),&red,NULL);
2197 if (signature==IMAGE_DOS_SIGNATURE || signature==IMAGE_OS2_SIGNATURE
2198 || signature==IMAGE_OS2_SIGNATURE_LE || signature==IMAGE_NT_SIGNATURE)
2199 { a |= 0x00400000; // executable
2200 }
2201 }
2202 }
2203 //
2204 if (attr!=NULL) *attr = a;
2205 if (size!=NULL) *size = hsize;
2206 if (times!=NULL)
2207 { // lutime_t is 32bit number of seconds elapsed since 0:0:0GMT, Jan1, 1970.
2208 // but FILETIME is 64bit number of 100-nanosecs since Jan1, 1601
2209 times->atime = filetime2timet(bhi.ftLastAccessTime);
2210 times->mtime = filetime2timet(bhi.ftLastWriteTime);
2211 times->ctime = filetime2timet(bhi.ftCreationTime);
2212 }
2213 if (timestamp!=NULL)
2214 { WORD dosdate,dostime;
2215 filetime2dosdatetime(bhi.ftLastWriteTime,&dosdate,&dostime);
2216 *timestamp = (WORD)dostime | (((DWORD)dosdate)<<16);
2217 }
2218 return ZR_OK;
2219 }
2220
2221
2222
2223
2224
2225
2226
2227
2228 class TZip
2229 { public:
TZip(const char * pwd)2230 TZip(const char *pwd) : hfout(0),mustclosehfout(false),hmapout(0),zfis(0),obuf(0),hfin(0),writ(0),oerr(false),hasputcen(false),ooffset(0),encwriting(false),encbuf(0),password(0), state(0) {if (pwd!=0 && *pwd!=0) {password=new char[strlen(pwd)+1]; strcpy(password,pwd);}}
~TZip()2231 ~TZip() {if (state!=0) delete state; state=0; if (encbuf!=0) delete[] encbuf; encbuf=0; if (password!=0) delete[] password; password=0;}
2232
2233 // These variables say about the file we're writing into
2234 // We can write to pipe, file-by-handle, file-by-name, memory-to-memmapfile
2235 char *password; // keep a copy of the password
2236 HANDLE hfout; // if valid, we'll write here (for files or pipes)
2237 bool mustclosehfout; // if true, we are responsible for closing hfout
2238 HANDLE hmapout; // otherwise, we'll write here (for memmap)
2239 unsigned ooffset; // for hfout, this is where the pointer was initially
2240 ZRESULT oerr; // did a write operation give rise to an error?
2241 unsigned writ; // how far have we written. This is maintained by Add, not write(), to avoid confusion over seeks
2242 bool ocanseek; // can we seek?
2243 char *obuf; // this is where we've locked mmap to view.
2244 unsigned int opos; // current pos in the mmap
2245 unsigned int mapsize; // the size of the map we created
2246 bool hasputcen; // have we yet placed the central directory?
2247 bool encwriting; // if true, then we'll encrypt stuff using 'keys' before we write it to disk
2248 unsigned long keys[3]; // keys are initialised inside Add()
2249 char *encbuf; // if encrypting, then this is a temporary workspace for encrypting the data
2250 unsigned int encbufsize; // (to be used and resized inside write(), and deleted in the destructor)
2251 //
2252 TZipFileInfo *zfis; // each file gets added onto this list, for writing the table at the end
2253 TState *state; // we use just one state object per zip, because it's big (500k)
2254
2255 ZRESULT Create(void *z,unsigned int len,DWORD flags);
2256 static unsigned sflush(void *param,const char *buf, unsigned *size);
2257 static unsigned swrite(void *param,const char *buf, unsigned size);
2258 unsigned int write(const char *buf,unsigned int size);
2259 bool oseek(unsigned int pos);
2260 ZRESULT GetMemory(void **pbuf, unsigned long *plen);
2261 ZRESULT Close();
2262
2263 // some variables to do with the file currently being read:
2264 // I haven't done it object-orientedly here, just put them all
2265 // together, since OO didn't seem to make the design any clearer.
2266 ulg attr; iztimes times; ulg timestamp; // all open_* methods set these
2267 bool iseekable; long isize,ired; // size is not set until close() on pips
2268 ulg crc; // crc is not set until close(). iwrit is cumulative
2269 HANDLE hfin; bool selfclosehf; // for input files and pipes
2270 const char *bufin; unsigned int lenin,posin; // for memory
2271 // and a variable for what we've done with the input: (i.e. compressed it!)
2272 ulg csize; // compressed size, set by the compression routines
2273 // and this is used by some of the compression routines
2274 char buf[16384];
2275
2276
2277 ZRESULT open_file(const TCHAR *fn);
2278 ZRESULT open_handle(HANDLE hf,unsigned int len);
2279 ZRESULT open_mem(void *src,unsigned int len);
2280 ZRESULT open_dir();
2281 static unsigned sread(TState &s,char *buf,unsigned size);
2282 unsigned read(char *buf, unsigned size);
2283 ZRESULT iclose();
2284
2285 ZRESULT ideflate(TZipFileInfo *zfi);
2286 ZRESULT istore();
2287
2288 ZRESULT Add(const TCHAR *odstzn, void *src,unsigned int len, DWORD flags);
2289 ZRESULT AddCentral();
2290
2291 };
2292
2293
2294
Create(void * z,unsigned int len,DWORD flags)2295 ZRESULT TZip::Create(void *z,unsigned int len,DWORD flags)
2296 { if (hfout!=0 || hmapout!=0 || obuf!=0 || writ!=0 || oerr!=ZR_OK || hasputcen) return ZR_NOTINITED;
2297 //
2298 if (flags==ZIP_HANDLE)
2299 { HANDLE hf = (HANDLE)z;
2300 hfout=hf; mustclosehfout=false;
2301 #ifdef DuplicateHandle
2302 BOOL res = DuplicateHandle(GetCurrentProcess(),hf,GetCurrentProcess(),&hfout,0,FALSE,DUPLICATE_SAME_ACCESS);
2303 if (res) mustclosehandle=true;
2304 #endif
2305 // now we have hfout. Either we duplicated the handle and we close it ourselves
2306 // (while the caller closes h themselves), or we couldn't duplicate it.
2307 DWORD res = SetFilePointer(hfout,0,0,FILE_CURRENT);
2308 ocanseek = (res!=0xFFFFFFFF);
2309 if (ocanseek) ooffset=res; else ooffset=0;
2310 return ZR_OK;
2311 }
2312 else if (flags==ZIP_FILENAME)
2313 { const TCHAR *fn = (const TCHAR*)z;
2314 hfout = CreateFile(fn,GENERIC_WRITE,0,NULL,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL,NULL);
2315 if (hfout==INVALID_HANDLE_VALUE) {hfout=0; return ZR_NOFILE;}
2316 ocanseek=true;
2317 ooffset=0;
2318 mustclosehfout=true;
2319 return ZR_OK;
2320 }
2321 else if (flags==ZIP_MEMORY)
2322 { unsigned int size = len;
2323 if (size==0) return ZR_MEMSIZE;
2324 if (z!=0) obuf=(char*)z;
2325 else
2326 { hmapout = CreateFileMapping(INVALID_HANDLE_VALUE,NULL,PAGE_READWRITE,0,size,NULL);
2327 if (hmapout==NULL) return ZR_NOALLOC;
2328 obuf = (char*)MapViewOfFile(hmapout,FILE_MAP_ALL_ACCESS,0,0,size);
2329 if (obuf==0) {CloseHandle(hmapout); hmapout=0; return ZR_NOALLOC;}
2330 }
2331 ocanseek=true;
2332 opos=0; mapsize=size;
2333 return ZR_OK;
2334 }
2335 else return ZR_ARGS;
2336 }
2337
sflush(void * param,const char * buf,unsigned * size)2338 unsigned TZip::sflush(void *param,const char *buf, unsigned *size)
2339 { // static
2340 if (*size==0) return 0;
2341 TZip *zip = (TZip*)param;
2342 unsigned int writ = zip->write(buf,*size);
2343 if (writ!=0) *size=0;
2344 return writ;
2345 }
swrite(void * param,const char * buf,unsigned size)2346 unsigned TZip::swrite(void *param,const char *buf, unsigned size)
2347 { // static
2348 if (size==0) return 0;
2349 TZip *zip=(TZip*)param; return zip->write(buf,size);
2350 }
write(const char * buf,unsigned int size)2351 unsigned int TZip::write(const char *buf,unsigned int size)
2352 { const char *srcbuf=buf;
2353 if (encwriting)
2354 { if (encbuf!=0 && encbufsize<size) {delete[] encbuf; encbuf=0;}
2355 if (encbuf==0) {encbuf=new char[size*2]; encbufsize=size;}
2356 memcpy(encbuf,buf,size);
2357 for (unsigned int i=0; i<size; i++) encbuf[i]=zencode(keys,encbuf[i]);
2358 srcbuf=encbuf;
2359 }
2360 if (obuf!=0)
2361 { if (opos+size>=mapsize) {oerr=ZR_MEMSIZE; return 0;}
2362 memcpy(obuf+opos, srcbuf, size);
2363 opos+=size;
2364 return size;
2365 }
2366 else if (hfout!=0)
2367 { DWORD writ; WriteFile(hfout,srcbuf,size,&writ,NULL);
2368 return writ;
2369 }
2370 oerr=ZR_NOTINITED; return 0;
2371 }
2372
oseek(unsigned int pos)2373 bool TZip::oseek(unsigned int pos)
2374 { if (!ocanseek) {oerr=ZR_SEEK; return false;}
2375 if (obuf!=0)
2376 { if (pos>=mapsize) {oerr=ZR_MEMSIZE; return false;}
2377 opos=pos;
2378 return true;
2379 }
2380 else if (hfout!=0)
2381 { SetFilePointer(hfout,pos+ooffset,NULL,FILE_BEGIN);
2382 return true;
2383 }
2384 oerr=ZR_NOTINITED; return 0;
2385 }
2386
GetMemory(void ** pbuf,unsigned long * plen)2387 ZRESULT TZip::GetMemory(void **pbuf, unsigned long *plen)
2388 { // When the user calls GetMemory, they're presumably at the end
2389 // of all their adding. In any case, we have to add the central
2390 // directory now, otherwise the memory we tell them won't be complete.
2391 if (!hasputcen) AddCentral(); hasputcen=true;
2392 if (pbuf!=NULL) *pbuf=(void*)obuf;
2393 if (plen!=NULL) *plen=writ;
2394 if (obuf==NULL) return ZR_NOTMMAP;
2395 return ZR_OK;
2396 }
2397
Close()2398 ZRESULT TZip::Close()
2399 { // if the directory hadn't already been added through a call to GetMemory,
2400 // then we do it now
2401 ZRESULT res=ZR_OK; if (!hasputcen) res=AddCentral(); hasputcen=true;
2402 if (obuf!=0 && hmapout!=0) UnmapViewOfFile(obuf); obuf=0;
2403 if (hmapout!=0) CloseHandle(hmapout); hmapout=0;
2404 if (hfout!=0 && mustclosehfout) CloseHandle(hfout); hfout=0; mustclosehfout=false;
2405 return res;
2406 }
2407
2408
2409
2410
open_file(const TCHAR * fn)2411 ZRESULT TZip::open_file(const TCHAR *fn)
2412 { hfin=0; bufin=0; selfclosehf=false; crc=CRCVAL_INITIAL; isize=0; csize=0; ired=0;
2413 if (fn==0) return ZR_ARGS;
2414 HANDLE hf = CreateFile(fn,GENERIC_READ,FILE_SHARE_READ,NULL,OPEN_EXISTING,0,NULL);
2415 if (hf==INVALID_HANDLE_VALUE) return ZR_NOFILE;
2416 ZRESULT res = open_handle(hf,0);
2417 if (res!=ZR_OK) {CloseHandle(hf); return res;}
2418 selfclosehf=true;
2419 return ZR_OK;
2420 }
open_handle(HANDLE hf,unsigned int len)2421 ZRESULT TZip::open_handle(HANDLE hf,unsigned int len)
2422 { hfin=0; bufin=0; selfclosehf=false; crc=CRCVAL_INITIAL; isize=0; csize=0; ired=0;
2423 if (hf==0 || hf==INVALID_HANDLE_VALUE) return ZR_ARGS;
2424 DWORD res = SetFilePointer(hfout,0,0,FILE_CURRENT);
2425 if (res!=0xFFFFFFFF)
2426 { ZRESULT res = GetFileInfo(hf,&attr,&isize,×,×tamp);
2427 if (res!=ZR_OK) return res;
2428 SetFilePointer(hf,0,NULL,FILE_BEGIN); // because GetFileInfo will have screwed it up
2429 iseekable=true; hfin=hf;
2430 return ZR_OK;
2431 }
2432 else
2433 { attr= 0x80000000; // just a normal file
2434 isize = -1; // can't know size until at the end
2435 if (len!=0) isize=len; // unless we were told explicitly!
2436 iseekable=false;
2437 SYSTEMTIME st; GetLocalTime(&st);
2438 FILETIME ft; SystemTimeToFileTime(&st,&ft);
2439 WORD dosdate,dostime; filetime2dosdatetime(ft,&dosdate,&dostime);
2440 times.atime = filetime2timet(ft);
2441 times.mtime = times.atime;
2442 times.ctime = times.atime;
2443 timestamp = (WORD)dostime | (((DWORD)dosdate)<<16);
2444 hfin=hf;
2445 return ZR_OK;
2446 }
2447 }
open_mem(void * src,unsigned int len)2448 ZRESULT TZip::open_mem(void *src,unsigned int len)
2449 { hfin=0; bufin=(const char*)src; selfclosehf=false; crc=CRCVAL_INITIAL; ired=0; csize=0; ired=0;
2450 lenin=len; posin=0;
2451 if (src==0 || len==0) return ZR_ARGS;
2452 attr= 0x80000000; // just a normal file
2453 isize = len;
2454 iseekable=true;
2455 SYSTEMTIME st; GetLocalTime(&st);
2456 FILETIME ft; SystemTimeToFileTime(&st,&ft);
2457 WORD dosdate,dostime; filetime2dosdatetime(ft,&dosdate,&dostime);
2458 times.atime = filetime2timet(ft);
2459 times.mtime = times.atime;
2460 times.ctime = times.atime;
2461 timestamp = (WORD)dostime | (((DWORD)dosdate)<<16);
2462 return ZR_OK;
2463 }
open_dir()2464 ZRESULT TZip::open_dir()
2465 { hfin=0; bufin=0; selfclosehf=false; crc=CRCVAL_INITIAL; isize=0; csize=0; ired=0;
2466 attr= 0x41C00010; // a readable writable directory, and again directory
2467 isize = 0;
2468 iseekable=false;
2469 SYSTEMTIME st; GetLocalTime(&st);
2470 FILETIME ft; SystemTimeToFileTime(&st,&ft);
2471 WORD dosdate,dostime; filetime2dosdatetime(ft,&dosdate,&dostime);
2472 times.atime = filetime2timet(ft);
2473 times.mtime = times.atime;
2474 times.ctime = times.atime;
2475 timestamp = (WORD)dostime | (((DWORD)dosdate)<<16);
2476 return ZR_OK;
2477 }
2478
sread(TState & s,char * buf,unsigned size)2479 unsigned TZip::sread(TState &s,char *buf,unsigned size)
2480 { // static
2481 TZip *zip = (TZip*)s.param;
2482 return zip->read(buf,size);
2483 }
2484
read(char * buf,unsigned size)2485 unsigned TZip::read(char *buf, unsigned size)
2486 { if (bufin!=0)
2487 { if (posin>=lenin) return 0; // end of input
2488 ulg red = lenin-posin;
2489 if (red>size) red=size;
2490 memcpy(buf, bufin+posin, red);
2491 posin += red;
2492 ired += red;
2493 crc = crc32(crc, (uch*)buf, red);
2494 return red;
2495 }
2496 else if (hfin!=0)
2497 { DWORD red;
2498 BOOL ok = ReadFile(hfin,buf,size,&red,NULL);
2499 if (!ok) return 0;
2500 ired += red;
2501 crc = crc32(crc, (uch*)buf, red);
2502 return red;
2503 }
2504 else {oerr=ZR_NOTINITED; return 0;}
2505 }
2506
iclose()2507 ZRESULT TZip::iclose()
2508 { if (selfclosehf && hfin!=0) CloseHandle(hfin); hfin=0;
2509 bool mismatch = (isize!=-1 && isize!=ired);
2510 isize=ired; // and crc has been being updated anyway
2511 if (mismatch) return ZR_MISSIZE;
2512 else return ZR_OK;
2513 }
2514
2515
2516
ideflate(TZipFileInfo * zfi)2517 ZRESULT TZip::ideflate(TZipFileInfo *zfi)
2518 { if (state==0) state=new TState();
2519 // It's a very big object! 500k! We allocate it on the heap, because PocketPC's
2520 // stack breaks if we try to put it all on the stack. It will be deleted lazily
2521 state->err=0;
2522 state->readfunc=sread; state->flush_outbuf=sflush;
2523 state->param=this; state->level=8; state->seekable=iseekable; state->err=NULL;
2524 // the following line will make ct_init realise it has to perform the init
2525 state->ts.static_dtree[0].dl.len = 0;
2526 // Thanks to Alvin77 for this crucial fix:
2527 state->ds.window_size=0;
2528 // I think that covers everything that needs to be initted.
2529 //
2530 bi_init(*state,buf, sizeof(buf), TRUE); // it used to be just 1024-size, not 16384 as here
2531 ct_init(*state,&zfi->att);
2532 lm_init(*state,state->level, &zfi->flg);
2533 ulg sz = deflate(*state);
2534 csize=sz;
2535 ZRESULT r=ZR_OK; if (state->err!=NULL) r=ZR_FLATE;
2536 return r;
2537 }
2538
istore()2539 ZRESULT TZip::istore()
2540 { ulg size=0;
2541 for (;;)
2542 { unsigned int cin=read(buf,16384); if (cin<=0 || cin==(unsigned int)EOF) break;
2543 unsigned int cout = write(buf,cin); if (cout!=cin) return ZR_MISSIZE;
2544 size += cin;
2545 }
2546 csize=size;
2547 return ZR_OK;
2548 }
2549
2550
2551
2552
2553
2554 bool has_seeded=false;
Add(const TCHAR * odstzn,void * src,unsigned int len,DWORD flags)2555 ZRESULT TZip::Add(const TCHAR *odstzn, void *src,unsigned int len, DWORD flags)
2556 { if (oerr) return ZR_FAILED;
2557 if (hasputcen) return ZR_ENDED;
2558
2559 // if we use password encryption, then every isize and csize is 12 bytes bigger
2560 int passex=0; if (password!=0 && flags!=ZIP_FOLDER) passex=12;
2561
2562 // zip has its own notion of what its names should look like: i.e. dir/file.stuff
2563 TCHAR dstzn[MAX_PATH]; _tcscpy(dstzn,odstzn);
2564 if (*dstzn==0) return ZR_ARGS;
2565 TCHAR *d=dstzn; while (*d!=0) {if (*d=='\\') *d='/'; d++;}
2566 bool isdir = (flags==ZIP_FOLDER);
2567 bool needs_trailing_slash = (isdir && dstzn[_tcslen(dstzn)-1]!='/');
2568 int method=DEFLATE; if (isdir || HasZipSuffix(dstzn)) method=STORE;
2569
2570 // now open whatever was our input source:
2571 ZRESULT openres;
2572 if (flags==ZIP_FILENAME) openres=open_file((const TCHAR*)src);
2573 else if (flags==ZIP_HANDLE) openres=open_handle((HANDLE)src,len);
2574 else if (flags==ZIP_MEMORY) openres=open_mem(src,len);
2575 else if (flags==ZIP_FOLDER) openres=open_dir();
2576 else return ZR_ARGS;
2577 if (openres!=ZR_OK) return openres;
2578
2579 // A zip "entry" consists of a local header (which includes the file name),
2580 // then the compressed data, and possibly an extended local header.
2581
2582 // Initialize the local header
2583 TZipFileInfo zfi; zfi.nxt=NULL;
2584 strcpy(zfi.name,"");
2585 #ifdef UNICODE
2586 WideCharToMultiByte(CP_UTF8,0,dstzn,-1,zfi.iname,MAX_PATH,0,0);
2587 #else
2588 strcpy(zfi.iname,dstzn);
2589 #endif
2590 zfi.nam=strlen(zfi.iname);
2591 if (needs_trailing_slash) {strcat(zfi.iname,"/"); zfi.nam++;}
2592 strcpy(zfi.zname,"");
2593 zfi.extra=NULL; zfi.ext=0; // extra header to go after this compressed data, and its length
2594 zfi.cextra=NULL; zfi.cext=0; // extra header to go in the central end-of-zip directory, and its length
2595 zfi.comment=NULL; zfi.com=0; // comment, and its length
2596 zfi.mark = 1;
2597 zfi.dosflag = 0;
2598 zfi.att = (ush)BINARY;
2599 zfi.vem = (ush)0xB17; // 0xB00 is win32 os-code. 0x17 is 23 in decimal: zip 2.3
2600 zfi.ver = (ush)20; // Needs PKUNZIP 2.0 to unzip it
2601 zfi.tim = timestamp;
2602 // Even though we write the header now, it will have to be rewritten, since we don't know compressed size or crc.
2603 zfi.crc = 0; // to be updated later
2604 zfi.flg = 8; // 8 means 'there is an extra header'. Assume for the moment that we need it.
2605 if (password!=0 && !isdir) zfi.flg=9; // and 1 means 'password-encrypted'
2606 zfi.lflg = zfi.flg; // to be updated later
2607 zfi.how = (ush)method; // to be updated later
2608 zfi.siz = (ulg)(method==STORE && isize>=0 ? isize+passex : 0); // to be updated later
2609 zfi.len = (ulg)(isize); // to be updated later
2610 zfi.dsk = 0;
2611 zfi.atx = attr;
2612 zfi.off = writ+ooffset; // offset within file of the start of this local record
2613 // stuff the 'times' structure into zfi.extra
2614
2615 // nb. apparently there's a problem with PocketPC CE(zip)->CE(unzip) fails. And removing the following block fixes it up.
2616 char xloc[EB_L_UT_SIZE]; zfi.extra=xloc; zfi.ext=EB_L_UT_SIZE;
2617 char xcen[EB_C_UT_SIZE]; zfi.cextra=xcen; zfi.cext=EB_C_UT_SIZE;
2618 xloc[0] = 'U';
2619 xloc[1] = 'T';
2620 xloc[2] = EB_UT_LEN(3); // length of data part of e.f.
2621 xloc[3] = 0;
2622 xloc[4] = EB_UT_FL_MTIME | EB_UT_FL_ATIME | EB_UT_FL_CTIME;
2623 xloc[5] = (char)(times.mtime);
2624 xloc[6] = (char)(times.mtime >> 8);
2625 xloc[7] = (char)(times.mtime >> 16);
2626 xloc[8] = (char)(times.mtime >> 24);
2627 xloc[9] = (char)(times.atime);
2628 xloc[10] = (char)(times.atime >> 8);
2629 xloc[11] = (char)(times.atime >> 16);
2630 xloc[12] = (char)(times.atime >> 24);
2631 xloc[13] = (char)(times.ctime);
2632 xloc[14] = (char)(times.ctime >> 8);
2633 xloc[15] = (char)(times.ctime >> 16);
2634 xloc[16] = (char)(times.ctime >> 24);
2635 memcpy(zfi.cextra,zfi.extra,EB_C_UT_SIZE);
2636 zfi.cextra[EB_LEN] = EB_UT_LEN(1);
2637
2638
2639 // (1) Start by writing the local header:
2640 int r = putlocal(&zfi,swrite,this);
2641 if (r!=ZE_OK) {iclose(); return ZR_WRITE;}
2642 writ += 4 + LOCHEAD + (unsigned int)zfi.nam + (unsigned int)zfi.ext;
2643 if (oerr!=ZR_OK) {iclose(); return oerr;}
2644
2645 // (1.5) if necessary, write the encryption header
2646 keys[0]=305419896L;
2647 keys[1]=591751049L;
2648 keys[2]=878082192L;
2649 for (const char *cp=password; cp!=0 && *cp!=0; cp++) update_keys(keys,*cp);
2650 // generate some random bytes
2651 if (!has_seeded) srand(GetTickCount()^(unsigned long)GetDesktopWindow());
2652 char encbuf[12]; for (int i=0; i<12; i++) encbuf[i]=(char)((rand()>>7)&0xff);
2653 encbuf[11] = (char)((zfi.tim>>8)&0xff);
2654 for (int ei=0; ei<12; ei++) encbuf[ei]=zencode(keys,encbuf[ei]);
2655 if (password!=0 && !isdir) {swrite(this,encbuf,12); writ+=12;}
2656
2657 //(2) Write deflated/stored file to zip file
2658 ZRESULT writeres=ZR_OK;
2659 encwriting = (password!=0 && !isdir); // an object member variable to say whether we write to disk encrypted
2660 if (!isdir && method==DEFLATE) writeres=ideflate(&zfi);
2661 else if (!isdir && method==STORE) writeres=istore();
2662 else if (isdir) csize=0;
2663 encwriting = false;
2664 iclose();
2665 writ += csize;
2666 if (oerr!=ZR_OK) return oerr;
2667 if (writeres!=ZR_OK) return ZR_WRITE;
2668
2669 // (3) Either rewrite the local header with correct information...
2670 bool first_header_has_size_right = (zfi.siz==csize+passex);
2671 zfi.crc = crc;
2672 zfi.siz = csize+passex;
2673 zfi.len = isize;
2674 if (ocanseek && (password==0 || isdir))
2675 { zfi.how = (ush)method;
2676 if ((zfi.flg & 1) == 0) zfi.flg &= ~8; // clear the extended local header flag
2677 zfi.lflg = zfi.flg;
2678 // rewrite the local header:
2679 if (!oseek(zfi.off-ooffset)) return ZR_SEEK;
2680 if ((r = putlocal(&zfi, swrite,this)) != ZE_OK) return ZR_WRITE;
2681 if (!oseek(writ)) return ZR_SEEK;
2682 }
2683 else
2684 { // (4) ... or put an updated header at the end
2685 if (zfi.how != (ush) method) return ZR_NOCHANGE;
2686 if (method==STORE && !first_header_has_size_right) return ZR_NOCHANGE;
2687 if ((r = putextended(&zfi, swrite,this)) != ZE_OK) return ZR_WRITE;
2688 writ += 16L;
2689 zfi.flg = zfi.lflg; // if flg modified by inflate, for the central index
2690 }
2691 if (oerr!=ZR_OK) return oerr;
2692
2693 // Keep a copy of the zipfileinfo, for our end-of-zip directory
2694 char *cextra = new char[zfi.cext]; memcpy(cextra,zfi.cextra,zfi.cext); zfi.cextra=cextra;
2695 TZipFileInfo *pzfi = new TZipFileInfo; memcpy(pzfi,&zfi,sizeof(zfi));
2696 if (zfis==NULL) zfis=pzfi;
2697 else {TZipFileInfo *z=zfis; while (z->nxt!=NULL) z=z->nxt; z->nxt=pzfi;}
2698 return ZR_OK;
2699 }
2700
AddCentral()2701 ZRESULT TZip::AddCentral()
2702 { // write central directory
2703 int numentries = 0;
2704 ulg pos_at_start_of_central = writ;
2705 //ulg tot_unc_size=0, tot_compressed_size=0;
2706 bool okay=true;
2707 for (TZipFileInfo *zfi=zfis; zfi!=NULL; )
2708 { if (okay)
2709 { int res = putcentral(zfi, swrite,this);
2710 if (res!=ZE_OK) okay=false;
2711 }
2712 writ += 4 + CENHEAD + (unsigned int)zfi->nam + (unsigned int)zfi->cext + (unsigned int)zfi->com;
2713 //tot_unc_size += zfi->len;
2714 //tot_compressed_size += zfi->siz;
2715 numentries++;
2716 //
2717 TZipFileInfo *zfinext = zfi->nxt;
2718 if (zfi->cextra!=0) delete[] zfi->cextra;
2719 delete zfi;
2720 zfi = zfinext;
2721 }
2722 ulg center_size = writ - pos_at_start_of_central;
2723 if (okay)
2724 { int res = putend(numentries, center_size, pos_at_start_of_central+ooffset, 0, NULL, swrite,this);
2725 if (res!=ZE_OK) okay=false;
2726 writ += 4 + ENDHEAD + 0;
2727 }
2728 if (!okay) return ZR_WRITE;
2729 return ZR_OK;
2730 }
2731
2732
2733
2734
2735
2736 ZRESULT lasterrorZ=ZR_OK;
2737
FormatZipMessageZ(ZRESULT code,char * buf,unsigned int len)2738 unsigned int FormatZipMessageZ(ZRESULT code, char *buf,unsigned int len)
2739 { if (code==ZR_RECENT) code=lasterrorZ;
2740 const char *msg="unknown zip result code";
2741 switch (code)
2742 { case ZR_OK: msg="Success"; break;
2743 case ZR_NODUPH: msg="Culdn't duplicate handle"; break;
2744 case ZR_NOFILE: msg="Couldn't create/open file"; break;
2745 case ZR_NOALLOC: msg="Failed to allocate memory"; break;
2746 case ZR_WRITE: msg="Error writing to file"; break;
2747 case ZR_NOTFOUND: msg="File not found in the zipfile"; break;
2748 case ZR_MORE: msg="Still more data to unzip"; break;
2749 case ZR_CORRUPT: msg="Zipfile is corrupt or not a zipfile"; break;
2750 case ZR_READ: msg="Error reading file"; break;
2751 case ZR_ARGS: msg="Caller: faulty arguments"; break;
2752 case ZR_PARTIALUNZ: msg="Caller: the file had already been partially unzipped"; break;
2753 case ZR_NOTMMAP: msg="Caller: can only get memory of a memory zipfile"; break;
2754 case ZR_MEMSIZE: msg="Caller: not enough space allocated for memory zipfile"; break;
2755 case ZR_FAILED: msg="Caller: there was a previous error"; break;
2756 case ZR_ENDED: msg="Caller: additions to the zip have already been ended"; break;
2757 case ZR_ZMODE: msg="Caller: mixing creation and opening of zip"; break;
2758 case ZR_NOTINITED: msg="Zip-bug: internal initialisation not completed"; break;
2759 case ZR_SEEK: msg="Zip-bug: trying to seek the unseekable"; break;
2760 case ZR_MISSIZE: msg="Zip-bug: the anticipated size turned out wrong"; break;
2761 case ZR_NOCHANGE: msg="Zip-bug: tried to change mind, but not allowed"; break;
2762 case ZR_FLATE: msg="Zip-bug: an internal error during flation"; break;
2763 }
2764 unsigned int mlen=(unsigned int)strlen(msg);
2765 if (buf==0 || len==0) return mlen;
2766 unsigned int n=mlen; if (n+1>len) n=len-1;
2767 strncpy(buf,msg,n); buf[n]=0;
2768 return mlen;
2769 }
2770
2771
2772
2773 typedef struct
2774 { DWORD flag;
2775 TZip *zip;
2776 } TZipHandleData;
2777
2778
CreateZipInternal(void * z,unsigned int len,DWORD flags,const char * password)2779 HZIP CreateZipInternal(void *z,unsigned int len,DWORD flags, const char *password)
2780 { TZip *zip = new TZip(password);
2781 lasterrorZ = zip->Create(z,len,flags);
2782 if (lasterrorZ!=ZR_OK) {delete zip; return 0;}
2783 TZipHandleData *han = new TZipHandleData;
2784 han->flag=2; han->zip=zip; return (HZIP)han;
2785 }
CreateZipHandle(HANDLE h,const char * password)2786 HZIP CreateZipHandle(HANDLE h, const char *password) {return CreateZipInternal(h,0,ZIP_HANDLE,password);}
CreateZip(const TCHAR * fn,const char * password)2787 HZIP CreateZip(const TCHAR *fn, const char *password) {return CreateZipInternal((void*)fn,0,ZIP_FILENAME,password);}
CreateZip(void * z,unsigned int len,const char * password)2788 HZIP CreateZip(void *z,unsigned int len, const char *password) {return CreateZipInternal(z,len,ZIP_MEMORY,password);}
2789
2790
ZipAddInternal(HZIP hz,const TCHAR * dstzn,void * src,unsigned int len,DWORD flags)2791 ZRESULT ZipAddInternal(HZIP hz,const TCHAR *dstzn, void *src,unsigned int len, DWORD flags)
2792 { if (hz==0) {lasterrorZ=ZR_ARGS;return ZR_ARGS;}
2793 TZipHandleData *han = (TZipHandleData*)hz;
2794 if (han->flag!=2) {lasterrorZ=ZR_ZMODE;return ZR_ZMODE;}
2795 TZip *zip = han->zip;
2796 lasterrorZ = zip->Add(dstzn,src,len,flags);
2797 return lasterrorZ;
2798 }
ZipAdd(HZIP hz,const TCHAR * dstzn,const TCHAR * fn)2799 ZRESULT ZipAdd(HZIP hz,const TCHAR *dstzn, const TCHAR *fn) {return ZipAddInternal(hz,dstzn,(void*)fn,0,ZIP_FILENAME);}
ZipAdd(HZIP hz,const TCHAR * dstzn,void * src,unsigned int len)2800 ZRESULT ZipAdd(HZIP hz,const TCHAR *dstzn, void *src,unsigned int len) {return ZipAddInternal(hz,dstzn,src,len,ZIP_MEMORY);}
ZipAddHandle(HZIP hz,const TCHAR * dstzn,HANDLE h)2801 ZRESULT ZipAddHandle(HZIP hz,const TCHAR *dstzn, HANDLE h) {return ZipAddInternal(hz,dstzn,h,0,ZIP_HANDLE);}
ZipAddHandle(HZIP hz,const TCHAR * dstzn,HANDLE h,unsigned int len)2802 ZRESULT ZipAddHandle(HZIP hz,const TCHAR *dstzn, HANDLE h, unsigned int len) {return ZipAddInternal(hz,dstzn,h,len,ZIP_HANDLE);}
ZipAddFolder(HZIP hz,const TCHAR * dstzn)2803 ZRESULT ZipAddFolder(HZIP hz,const TCHAR *dstzn) {return ZipAddInternal(hz,dstzn,0,0,ZIP_FOLDER);}
2804
2805
2806
ZipGetMemory(HZIP hz,void ** buf,unsigned long * len)2807 ZRESULT ZipGetMemory(HZIP hz, void **buf, unsigned long *len)
2808 { if (hz==0) {if (buf!=0) *buf=0; if (len!=0) *len=0; lasterrorZ=ZR_ARGS;return ZR_ARGS;}
2809 TZipHandleData *han = (TZipHandleData*)hz;
2810 if (han->flag!=2) {lasterrorZ=ZR_ZMODE;return ZR_ZMODE;}
2811 TZip *zip = han->zip;
2812 lasterrorZ = zip->GetMemory(buf,len);
2813 return lasterrorZ;
2814 }
2815
CloseZipZ(HZIP hz)2816 ZRESULT CloseZipZ(HZIP hz)
2817 { if (hz==0) {lasterrorZ=ZR_ARGS;return ZR_ARGS;}
2818 TZipHandleData *han = (TZipHandleData*)hz;
2819 if (han->flag!=2) {lasterrorZ=ZR_ZMODE;return ZR_ZMODE;}
2820 TZip *zip = han->zip;
2821 lasterrorZ = zip->Close();
2822 delete zip;
2823 delete han;
2824 return lasterrorZ;
2825 }
2826
IsZipHandleZ(HZIP hz)2827 bool IsZipHandleZ(HZIP hz)
2828 { if (hz==0) return false;
2829 TZipHandleData *han = (TZipHandleData*)hz;
2830 return (han->flag==2);
2831 }
2832
2833 #endif //WIN32
2834
2835