1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8     http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19 
20 /*
21 Functions for basic LZ77 compression and utilities for the "squeeze" LZ77
22 compression.
23 */
24 
25 #ifndef ZOPFLI_LZ77_H_
26 #define ZOPFLI_LZ77_H_
27 
28 #include <stdlib.h>
29 
30 #include "cache.h"
31 #include "hash.h"
32 #include "zopfli.h"
33 
34 /*
35 Stores lit/length and dist pairs for LZ77.
36 Parameter litlens: Contains the literal symbols or length values.
37 Parameter dists: Contains the distances. A value is 0 to indicate that there is
38 no dist and the corresponding litlens value is a literal instead of a length.
39 Parameter size: The size of both the litlens and dists arrays.
40 The memory can best be managed by using ZopfliInitLZ77Store to initialize it,
41 ZopfliCleanLZ77Store to destroy it, and ZopfliStoreLitLenDist to append values.
42 
43 */
44 typedef struct ZopfliLZ77Store {
45   unsigned short* litlens;  /* Lit or len. */
46   unsigned short* dists;  /* If 0: indicates literal in corresponding litlens,
47       if > 0: length in corresponding litlens, this is the distance. */
48   size_t size;
49 
50   const unsigned char* data;  /* original data */
51   size_t* pos;  /* position in data where this LZ77 command begins */
52 
53   unsigned short* ll_symbol;
54   unsigned short* d_symbol;
55 
56   /* Cumulative histograms wrapping around per chunk. Each chunk has the amount
57   of distinct symbols as length, so using 1 value per LZ77 symbol, we have a
58   precise histogram at every N symbols, and the rest can be calculated by
59   looping through the actual symbols of this chunk. */
60   size_t* ll_counts;
61   size_t* d_counts;
62 } ZopfliLZ77Store;
63 
64 void ZopfliInitLZ77Store(const unsigned char* data, ZopfliLZ77Store* store);
65 void ZopfliCleanLZ77Store(ZopfliLZ77Store* store);
66 void ZopfliCopyLZ77Store(const ZopfliLZ77Store* source, ZopfliLZ77Store* dest);
67 void ZopfliStoreLitLenDist(unsigned short length, unsigned short dist,
68                            size_t pos, ZopfliLZ77Store* store);
69 void ZopfliAppendLZ77Store(const ZopfliLZ77Store* store,
70                            ZopfliLZ77Store* target);
71 /* Gets the amount of raw bytes that this range of LZ77 symbols spans. */
72 size_t ZopfliLZ77GetByteRange(const ZopfliLZ77Store* lz77,
73                               size_t lstart, size_t lend);
74 /* Gets the histogram of lit/len and dist symbols in the given range, using the
75 cumulative histograms, so faster than adding one by one for large range. Does
76 not add the one end symbol of value 256. */
77 void ZopfliLZ77GetHistogram(const ZopfliLZ77Store* lz77,
78                             size_t lstart, size_t lend,
79                             size_t* ll_counts, size_t* d_counts);
80 
81 /*
82 Some state information for compressing a block.
83 This is currently a bit under-used (with mainly only the longest match cache),
84 but is kept for easy future expansion.
85 */
86 typedef struct ZopfliBlockState {
87   const ZopfliOptions* options;
88 
89 #ifdef ZOPFLI_LONGEST_MATCH_CACHE
90   /* Cache for length/distance pairs found so far. */
91   ZopfliLongestMatchCache* lmc;
92 #endif
93 
94   /* The start (inclusive) and end (not inclusive) of the current block. */
95   size_t blockstart;
96   size_t blockend;
97 } ZopfliBlockState;
98 
99 void ZopfliInitBlockState(const ZopfliOptions* options,
100                           size_t blockstart, size_t blockend, int add_lmc,
101                           ZopfliBlockState* s);
102 void ZopfliCleanBlockState(ZopfliBlockState* s);
103 
104 /*
105 Finds the longest match (length and corresponding distance) for LZ77
106 compression.
107 Even when not using "sublen", it can be more efficient to provide an array,
108 because only then the caching is used.
109 array: the data
110 pos: position in the data to find the match for
111 size: size of the data
112 limit: limit length to maximum this value (default should be 258). This allows
113     finding a shorter dist for that length (= less extra bits). Must be
114     in the range [ZOPFLI_MIN_MATCH, ZOPFLI_MAX_MATCH].
115 sublen: output array of 259 elements, or null. Has, for each length, the
116     smallest distance required to reach this length. Only 256 of its 259 values
117     are used, the first 3 are ignored (the shortest length is 3. It is purely
118     for convenience that the array is made 3 longer).
119 */
120 void ZopfliFindLongestMatch(
121     ZopfliBlockState *s, const ZopfliHash* h, const unsigned char* array,
122     size_t pos, size_t size, size_t limit,
123     unsigned short* sublen, unsigned short* distance, unsigned short* length);
124 
125 /*
126 Verifies if length and dist are indeed valid, only used for assertion.
127 */
128 void ZopfliVerifyLenDist(const unsigned char* data, size_t datasize, size_t pos,
129                          unsigned short dist, unsigned short length);
130 
131 /*
132 Does LZ77 using an algorithm similar to gzip, with lazy matching, rather than
133 with the slow but better "squeeze" implementation.
134 The result is placed in the ZopfliLZ77Store.
135 If instart is larger than 0, it uses values before instart as starting
136 dictionary.
137 */
138 void ZopfliLZ77Greedy(ZopfliBlockState* s, const unsigned char* in,
139                       size_t instart, size_t inend,
140                       ZopfliLZ77Store* store, ZopfliHash* h);
141 
142 #endif  /* ZOPFLI_LZ77_H_ */
143