1 /* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007,
3 * 2008, 2009, 2010. Joshua P. MacDonald
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20 /* To know more about Xdelta, start by reading xdelta3.c. If you are
21 * ready to use the API, continue reading here. There are two
22 * interfaces -- xd3_encode_input and xd3_decode_input -- plus a dozen
23 * or so related calls. This interface is styled after Zlib. */
24
25 #ifndef _XDELTA3_H_
26 #define _XDELTA3_H_
27
28 #ifndef XD3_USE_LARGEFILE64
29 #define XD3_USE_LARGEFILE64 1
30 #endif
31
32 #ifdef _WIN32
33 #ifndef WIN32_LEAN_AND_MEAN
34 # define WIN32_LEAN_AND_MEAN
35 #endif
36 /* the version defs below are not necessary for win64 */
37 #if !defined(_WIN64)
38 #if XD3_USE_LARGEFILE64
39 /* 64 bit file offsets: uses GetFileSizeEx and SetFilePointerEx.
40 * requires Win2000 or newer version of WinNT */
41 #define WINVER 0x0502
42 #define _WIN32_WINNT 0x0502
43 #else
44 /* 32 bit (DWORD) file offsets: uses GetFileSize and
45 * SetFilePointer. compatible with win9x-me and WinNT4 */
46 #define WINVER 0x0400
47 #define _WIN32_WINNT 0x0400
48 #endif
49 #endif /* !_WIN64 */
50 #endif /* _WIN32 */
51
52 #include <stddef.h>
53 #include <limits.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <sys/types.h>
57
58 /****************************************************************/
59
60 /* Sizes and addresses within VCDIFF windows are represented as usize_t
61 *
62 * For source-file offsets and total file sizes, total input and
63 * output counts, the xoff_t type is used. The decoder and encoder
64 * generally check for overflow of the xoff_t size (this is tested at
65 * the 32bit boundary [xdelta3-test.h]).
66 */
67 #include "xdelta3-sizedefs.h"
68
69 #if defined(_WIN32)
70 #include <windows.h>
71 #ifdef _MSC_VER
72 #define inline /* __inline */
73 #endif
74 #endif /* WIN32 */
75
76 #if defined(__DJGPP__) && !defined(__DJGPP_MINOR__)
77 #include <stdio.h>
78 #endif /* __DJGPP__ */
79
80 #include "q_stdint.h"
81
82 /* TODO: note that SIZEOF_USIZE_T is never set to 8, although it should be for
83 * a 64bit platform. OTOH, may be that using 32bits is appropriate even on a
84 * 64bit platform because we allocate large arrays of these values. */
85 #if XD3_USE_LARGEFILE64
86 #define __USE_FILE_OFFSET64 1 /* GLIBC: for 64bit fileops, ... ? */
87 #ifndef _LARGEFILE_SOURCE
88 #define _LARGEFILE_SOURCE
89 #endif
90 #ifndef _FILE_OFFSET_BITS
91 #define _FILE_OFFSET_BITS 64
92 #endif
93
94 typedef uint64_t xoff_t;
95 #define SIZEOF_XOFF_T 8
96 #define SIZEOF_USIZE_T 4
97 #ifdef _WIN32
98 #define Q "I64"
99 #elif defined(__APPLE__) && defined(__MACH__)
100 #if ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__+0) > 1020)
101 #define Q "ll" /* darwin uint64_t is unsigned long long */
102 #else
103 #define Q "q" /* but "ll" requires > 10.2 */
104 #endif /* MAC OS X */
105 #elif (LONG_MAX > 2147483647L)
106 #define Q "l" /* rely on uint64_t being defined as unsigned long */
107 #else
108 #define Q "ll" /* rely on uint64_t being defined as unsigned long long */
109 #endif
110
111 #else /* !XD3_USE_LARGEFILE64: */
112
113 typedef uint32_t xoff_t;
114 #define SIZEOF_XOFF_T 4
115 #define SIZEOF_USIZE_T 4
116 #ifdef __DJGPP__
117 #define Q "l" /* djgpp uint32_t is unsigned long */
118 #else
119 #define Q /* rely on uint32_t being defined as unsigned int */
120 #endif
121 #endif /* !XD3_USE_LARGEFILE64 */
122
123 #define USE_UINT32 (SIZEOF_USIZE_T == 4 || SIZEOF_XOFF_T == 4)
124 #define USE_UINT64 (SIZEOF_USIZE_T == 8 || SIZEOF_XOFF_T == 8)
125
126 /* TODO: probably should do something better here. */
127 #ifndef UNALIGNED_OK
128 #if defined(__i386__) || defined(__i486__) || defined(__i586__) || \
129 defined(__i686__) || defined(_X86_) || defined(__x86_64__)
130 #define UNALIGNED_OK 1
131 #else
132 #define UNALIGNED_OK 0
133 #endif
134 #endif
135
136 /**********************************************************************/
137
138 /* The code returned when main() fails, also defined in system
139 includes. */
140 #ifndef EXIT_FAILURE
141 #define EXIT_FAILURE 1
142 #endif
143
144 /* XD3_DEBUG=1 enables assertions and various statistics. Levels > 1
145 * enable some additional output only useful during development and
146 * debugging. */
147 #ifndef XD3_DEBUG
148 #define XD3_DEBUG 0
149 #endif
150
151 /* There are three string matching functions supplied: one fast, one
152 * slow (default), and one soft-configurable. To disable any of
153 * these, use the following definitions. */
154 #ifndef XD3_BUILD_SLOW
155 #define XD3_BUILD_SLOW 1
156 #endif
157 #ifndef XD3_BUILD_FAST
158 #define XD3_BUILD_FAST 1
159 #endif
160 #ifndef XD3_BUILD_FASTER
161 #define XD3_BUILD_FASTER 1
162 #endif
163 #ifndef XD3_BUILD_FASTEST
164 #define XD3_BUILD_FASTEST 1
165 #endif
166 #ifndef XD3_BUILD_SOFT
167 #define XD3_BUILD_SOFT 1
168 #endif
169 #ifndef XD3_BUILD_DEFAULT
170 #define XD3_BUILD_DEFAULT 1
171 #endif
172
173 #if XD3_DEBUG
174 #include <stdio.h>
175 #endif
176
177 typedef struct _xd3_stream xd3_stream;
178 typedef struct _xd3_source xd3_source;
179 typedef struct _xd3_hash_cfg xd3_hash_cfg;
180 typedef struct _xd3_smatcher xd3_smatcher;
181 typedef struct _xd3_rinst xd3_rinst;
182 typedef struct _xd3_dinst xd3_dinst;
183 typedef struct _xd3_hinst xd3_hinst;
184 typedef struct _xd3_winst xd3_winst;
185 typedef struct _xd3_rpage xd3_rpage;
186 typedef struct _xd3_addr_cache xd3_addr_cache;
187 typedef struct _xd3_output xd3_output;
188 typedef struct _xd3_desect xd3_desect;
189 typedef struct _xd3_iopt_buflist xd3_iopt_buflist;
190 typedef struct _xd3_rlist xd3_rlist;
191 typedef struct _xd3_sec_type xd3_sec_type;
192 typedef struct _xd3_sec_cfg xd3_sec_cfg;
193 typedef struct _xd3_sec_stream xd3_sec_stream;
194 typedef struct _xd3_config xd3_config;
195 typedef struct _xd3_code_table_desc xd3_code_table_desc;
196 typedef struct _xd3_code_table_sizes xd3_code_table_sizes;
197 typedef struct _xd3_slist xd3_slist;
198 typedef struct _xd3_whole_state xd3_whole_state;
199 typedef struct _xd3_wininfo xd3_wininfo;
200
201 /* The stream configuration has three callbacks functions, all of
202 * which may be supplied with NULL values. If config->getblk is
203 * provided as NULL, the stream returns XD3_GETSRCBLK. */
204
205 typedef void* (xd3_alloc_func) (void *opaque,
206 usize_t items,
207 usize_t size);
208 typedef void (xd3_free_func) (void *opaque,
209 void *address);
210
211 typedef int (xd3_getblk_func) (xd3_stream *stream,
212 xd3_source *source,
213 xoff_t blkno);
214
215 /* These are internal functions to delay construction of encoding
216 * tables and support alternate code tables. See the comments & code
217 * enabled by GENERIC_ENCODE_TABLES. */
218
219 typedef const xd3_dinst* (xd3_code_table_func) (void);
220 typedef int (xd3_comp_table_func) (xd3_stream *stream,
221 const uint8_t **data,
222 usize_t *size);
223
224
225
226 #if XD3_DEBUG
227 #define XD3_ASSERT(x) \
228 do { if (! (x)) { DP(RINT "%s:%d: XD3 assertion failed: %s\n", __FILE__, __LINE__, #x); \
229 abort (); } } while (0)
230 #else
231 #define XD3_ASSERT(x) do{}while(0)
232 #endif /* XD3_DEBUG */
233
234 #define xd3_max(x,y) ((x) < (y) ? (y) : (x))
235 #define xd3_min(x,y) ((x) < (y) ? (x) : (y))
236
237 /****************************************************************
238 PUBLIC ENUMS
239 ******************************************************************/
240
241 /* These are the five ordinary status codes returned by the
242 * xd3_encode_input() and xd3_decode_input() state machines. */
243 typedef enum {
244
245 /* An application must be prepared to handle these five return
246 * values from either xd3_encode_input or xd3_decode_input, except
247 * in the case of no-source compression, in which case XD3_GETSRCBLK
248 * is never returned. More detailed comments for these are given in
249 * xd3_encode_input and xd3_decode_input comments, below. */
250 XD3_INPUT = -17703, /* need input */
251 XD3_OUTPUT = -17704, /* have output */
252 XD3_GETSRCBLK = -17705, /* need a block of source input (with no
253 * xd3_getblk function), a chance to do
254 * non-blocking read. */
255 XD3_GOTHEADER = -17706, /* (decode-only) after the initial VCDIFF &
256 first window header */
257 XD3_WINSTART = -17707, /* notification: returned before a window is
258 * processed, giving a chance to
259 * XD3_SKIP_WINDOW or not XD3_SKIP_EMIT that
260 * window. */
261 XD3_WINFINISH = -17708, /* notification: returned after
262 encode/decode & output for a window */
263 XD3_TOOFARBACK = -17709, /* (encoder only) may be returned by
264 getblk() if the block is too old */
265 XD3_INTERNAL = -17710, /* internal error */
266 XD3_INVALID = -17711, /* invalid config */
267 XD3_INVALID_INPUT = -17712, /* invalid input/decoder error */
268 XD3_NOSECOND = -17713, /* when secondary compression finds no
269 improvement. */
270 XD3_UNIMPLEMENTED = -17714, /* currently VCD_TARGET */
271 } xd3_rvalues;
272
273 /* special values in config->flags */
274 typedef enum
275 {
276 XD3_JUST_HDR = (1 << 1), /* used by VCDIFF tools, see
277 xdelta3-main.h. */
278 XD3_SKIP_WINDOW = (1 << 2), /* used by VCDIFF tools, see
279 xdelta3-main.h. */
280 XD3_SKIP_EMIT = (1 << 3), /* used by VCDIFF tools, see
281 xdelta3-main.h. */
282 XD3_FLUSH = (1 << 4), /* flush the stream buffer to
283 prepare for
284 xd3_stream_close(). */
285
286 XD3_SEC_DJW = (1 << 5), /* use DJW static huffman */
287 XD3_SEC_FGK = (1 << 6), /* use FGK adaptive huffman */
288 XD3_SEC_TYPE = (XD3_SEC_DJW | XD3_SEC_FGK),
289
290 XD3_SEC_NODATA = (1 << 7), /* disable secondary compression of
291 the data section. */
292 XD3_SEC_NOINST = (1 << 8), /* disable secondary compression of
293 the inst section. */
294 XD3_SEC_NOADDR = (1 << 9), /* disable secondary compression of
295 the addr section. */
296
297 XD3_SEC_NOALL = (XD3_SEC_NODATA | XD3_SEC_NOINST | XD3_SEC_NOADDR),
298
299 XD3_ADLER32 = (1 << 10), /* enable checksum computation in
300 the encoder. */
301 XD3_ADLER32_NOVER = (1 << 11), /* disable checksum verification in
302 the decoder. */
303
304 XD3_ALT_CODE_TABLE = (1 << 12), /* for testing th
305 e alternate code table encoding. */
306
307 XD3_NOCOMPRESS = (1 << 13), /* disable ordinary data
308 * compression feature, only search
309 * the source, not the target. */
310 XD3_BEGREEDY = (1 << 14), /* disable the "1.5-pass
311 * algorithm", instead use greedy
312 * matching. Greedy is off by
313 * default. */
314 XD3_ADLER32_RECODE = (1 << 15), /* used by "recode". */
315
316 /* 4 bits to set the compression level the same as the command-line
317 * setting -1 through -9 (-0 corresponds to the XD3_NOCOMPRESS flag,
318 * and is independent of compression level). This is for
319 * convenience, especially with xd3_encode_memory(). */
320
321 XD3_COMPLEVEL_SHIFT = 20, /* 20 - 24 */
322 XD3_COMPLEVEL_MASK = (0xF << XD3_COMPLEVEL_SHIFT),
323 XD3_COMPLEVEL_1 = (1 << XD3_COMPLEVEL_SHIFT),
324 XD3_COMPLEVEL_2 = (2 << XD3_COMPLEVEL_SHIFT),
325 XD3_COMPLEVEL_3 = (3 << XD3_COMPLEVEL_SHIFT),
326 XD3_COMPLEVEL_6 = (6 << XD3_COMPLEVEL_SHIFT),
327 XD3_COMPLEVEL_9 = (9 << XD3_COMPLEVEL_SHIFT),
328
329 } xd3_flags;
330
331 /* The values of this enumeration are set in xd3_config using the
332 * smatch_cfg variable. It can be set to default, slow, fast, etc.,
333 * and soft. */
334 typedef enum
335 {
336 XD3_SMATCH_DEFAULT = 0, /* Flags may contain XD3_COMPLEVEL bits,
337 else default. */
338 XD3_SMATCH_SLOW = 1,
339 XD3_SMATCH_FAST = 2,
340 XD3_SMATCH_FASTER = 3,
341 XD3_SMATCH_FASTEST = 4,
342 XD3_SMATCH_SOFT = 5,
343 } xd3_smatch_cfg;
344
345 /*********************************************************************
346 PRIVATE ENUMS
347 **********************************************************************/
348
349 /* stream->match_state is part of the xd3_encode_input state machine
350 * for source matching:
351 *
352 * 1. the XD3_GETSRCBLK block-read mechanism means reentrant matching
353 * 2. this state spans encoder windows: a match and end-of-window
354 * will continue in the next 3. the initial target byte and source
355 * byte are a presumed match, to avoid some computation in case the
356 * inputs are identical.
357 */
358 typedef enum {
359
360 MATCH_TARGET = 0, /* in this state, attempt to match the start of
361 * the target with the previously set source
362 * address (initially 0). */
363 MATCH_BACKWARD = 1, /* currently expanding a match backward in the
364 source/target. */
365 MATCH_FORWARD = 2, /* currently expanding a match forward in the
366 source/target. */
367 MATCH_SEARCHING = 3, /* currently searching for a match. */
368
369 } xd3_match_state;
370
371 /* The xd3_encode_input state machine steps through these states in
372 * the following order. The matcher is reentrant and returns
373 * XD3_INPUT whenever it requires more data. After receiving
374 * XD3_INPUT, if the application reads EOF it should call
375 * xd3_stream_close().
376 */
377 typedef enum {
378
379 ENC_INIT = 0, /* xd3_encode_input has never been called. */
380 ENC_INPUT = 1, /* waiting for xd3_avail_input () to be called. */
381 ENC_SEARCH = 2, /* currently searching for matches. */
382 ENC_INSTR = 3, /* currently formatting output. */
383 ENC_FLUSH = 4, /* currently emitting output. */
384 ENC_POSTOUT = 5, /* after an output section. */
385 ENC_POSTWIN = 6, /* after all output sections. */
386 ENC_ABORTED = 7, /* abort. */
387 } xd3_encode_state;
388
389 /* The xd3_decode_input state machine steps through these states in
390 * the following order. The matcher is reentrant and returns
391 * XD3_INPUT whenever it requires more data. After receiving
392 * XD3_INPUT, if the application reads EOF it should call
393 * xd3_stream_close().
394 *
395 * 0-8: the VCDIFF header
396 * 9-18: the VCDIFF window header
397 * 19-21: the three primary sections: data, inst, addr
398 * 22: producing output: returns XD3_OUTPUT, possibly XD3_GETSRCBLK,
399 * 23: return XD3_WINFINISH, set state=9 to decode more input
400 */
401 typedef enum {
402
403 DEC_VCHEAD = 0, /* VCDIFF header */
404 DEC_HDRIND = 1, /* header indicator */
405
406 DEC_SECONDID = 2, /* secondary compressor ID */
407
408 DEC_TABLEN = 3, /* code table length */
409 DEC_NEAR = 4, /* code table near */
410 DEC_SAME = 5, /* code table same */
411 DEC_TABDAT = 6, /* code table data */
412
413 DEC_APPLEN = 7, /* application data length */
414 DEC_APPDAT = 8, /* application data */
415
416 DEC_WININD = 9, /* window indicator */
417
418 DEC_CPYLEN = 10, /* copy window length */
419 DEC_CPYOFF = 11, /* copy window offset */
420
421 DEC_ENCLEN = 12, /* length of delta encoding */
422 DEC_TGTLEN = 13, /* length of target window */
423 DEC_DELIND = 14, /* delta indicator */
424
425 DEC_DATALEN = 15, /* length of ADD+RUN data */
426 DEC_INSTLEN = 16, /* length of instruction data */
427 DEC_ADDRLEN = 17, /* length of address data */
428
429 DEC_CKSUM = 18, /* window checksum */
430
431 DEC_DATA = 19, /* data section */
432 DEC_INST = 20, /* instruction section */
433 DEC_ADDR = 21, /* address section */
434
435 DEC_EMIT = 22, /* producing data */
436
437 DEC_FINISH = 23, /* window finished */
438
439 DEC_ABORTED = 24, /* xd3_abort_stream */
440 } xd3_decode_state;
441
442 /************************************************************
443 internal types
444 ************************************************************/
445
446 /* instruction lists used in the IOPT buffer */
447 struct _xd3_rlist
448 {
449 xd3_rlist *next;
450 xd3_rlist *prev;
451 };
452
453 /* the raw encoding of an instruction used in the IOPT buffer */
454 struct _xd3_rinst
455 {
456 uint8_t type;
457 uint8_t xtra;
458 uint8_t code1;
459 uint8_t code2;
460 usize_t pos;
461 usize_t size;
462 xoff_t addr;
463 xd3_rlist link;
464 };
465
466 /* the code-table form of an single- or double-instruction */
467 struct _xd3_dinst
468 {
469 uint8_t type1;
470 uint8_t size1;
471 uint8_t type2;
472 uint8_t size2;
473 };
474
475 /* the decoded form of a single (half) instruction. */
476 struct _xd3_hinst
477 {
478 uint8_t type;
479 uint32_t size; /* TODO: why decode breaks if this is usize_t? */
480 uint32_t addr; /* TODO: why decode breaks if this is usize_t? */
481 };
482
483 /* the form of a whole-file instruction */
484 struct _xd3_winst
485 {
486 uint8_t type; /* RUN, ADD, COPY */
487 uint8_t mode; /* 0, VCD_SOURCE, VCD_TARGET */
488 usize_t size;
489 xoff_t addr;
490 xoff_t position; /* absolute position of this inst */
491 };
492
493 /* used by the encoder to buffer output in sections. list of blocks. */
494 struct _xd3_output
495 {
496 uint8_t *base;
497 usize_t next;
498 usize_t avail;
499 xd3_output *next_page;
500 };
501
502 /* used by the decoder to buffer input in sections. */
503 struct _xd3_desect
504 {
505 const uint8_t *buf;
506 const uint8_t *buf_max;
507 uint32_t size; /* TODO: why decode breaks if this is usize_t? */
508 usize_t pos;
509
510 /* used in xdelta3-decode.h */
511 uint8_t *copied1;
512 usize_t alloc1;
513
514 /* used in xdelta3-second.h */
515 uint8_t *copied2;
516 usize_t alloc2;
517 };
518
519 /* the VCDIFF address cache, see the RFC */
520 struct _xd3_addr_cache
521 {
522 usize_t s_near;
523 usize_t s_same;
524 usize_t next_slot; /* the circular index for near */
525 usize_t *near_array; /* array of size s_near */
526 usize_t *same_array; /* array of size s_same*256 */
527 };
528
529 /* the IOPT buffer list is just a list of buffers, which may be allocated
530 * during encode when using an unlimited buffer. */
531 struct _xd3_iopt_buflist
532 {
533 xd3_rinst *buffer;
534 xd3_iopt_buflist *next;
535 };
536
537 /* This is the record of a pre-compiled configuration, a subset of
538 xd3_config. */
539 struct _xd3_smatcher
540 {
541 const char *name;
542 int (*string_match) (xd3_stream *stream);
543 usize_t large_look;
544 usize_t large_step;
545 usize_t small_look;
546 usize_t small_chain;
547 usize_t small_lchain;
548 usize_t max_lazy;
549 usize_t long_enough;
550 };
551
552 /* hash table size & power-of-two hash function. */
553 struct _xd3_hash_cfg
554 {
555 usize_t size;
556 usize_t shift;
557 usize_t mask;
558 };
559
560 /* the sprev list */
561 struct _xd3_slist
562 {
563 usize_t last_pos;
564 };
565
566 /* window info (for whole state) */
567 struct _xd3_wininfo {
568 xoff_t offset;
569 usize_t length;
570 uint32_t adler32;
571 };
572
573 /* whole state for, e.g., merge */
574 struct _xd3_whole_state {
575 usize_t addslen;
576 uint8_t *adds;
577 usize_t adds_alloc;
578
579 usize_t instlen;
580 xd3_winst *inst;
581 usize_t inst_alloc;
582
583 usize_t wininfolen;
584 xd3_wininfo *wininfo;
585 usize_t wininfo_alloc;
586
587 xoff_t length;
588 };
589
590 /********************************************************************
591 public types
592 *******************************************************************/
593
594 /* Settings for the secondary compressor. */
595 struct _xd3_sec_cfg
596 {
597 int data_type; /* Which section. (set automatically) */
598 usize_t ngroups; /* Number of DJW Huffman groups. */
599 usize_t sector_size; /* Sector size. */
600 int inefficient; /* If true, ignore efficiency check [avoid XD3_NOSECOND]. */
601 };
602
603 /* This is the user-visible stream configuration. */
604 struct _xd3_config
605 {
606 usize_t winsize; /* The encoder window size. */
607 usize_t sprevsz; /* How far back small string
608 matching goes */
609 usize_t iopt_size; /* entries in the
610 instruction-optimizing
611 buffer */
612 usize_t srcwin_maxsz; /* srcwin_size grows by a factor
613 of 2 when no matches are
614 found. encoder will not seek
615 back further than this. */
616
617 xd3_getblk_func *getblk; /* The three callbacks. */
618 xd3_alloc_func *alloc;
619 xd3_free_func *freef;
620 void *opaque; /* Not used. */
621 int flags; /* stream->flags are initialized
622 * from xd3_config & never
623 * modified by the library. Use
624 * xd3_set_flags to modify flags
625 * settings mid-stream. */
626
627 xd3_sec_cfg sec_data; /* Secondary compressor config: data */
628 xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */
629 xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */
630
631 xd3_smatch_cfg smatch_cfg; /* See enum: use fields below for
632 soft config */
633 xd3_smatcher smatcher_soft;
634 };
635
636 /* The primary source file object. You create one of these objects and
637 * initialize the first four fields. This library maintains the next
638 * 5 fields. The configured getblk implementation is responsible for
639 * setting the final 3 fields when called (and/or when XD3_GETSRCBLK
640 * is returned).
641 */
642 struct _xd3_source
643 {
644 /* you set */
645 usize_t blksize; /* block size */
646 const char *name; /* its name, for debug/print
647 purposes */
648 void *ioh; /* opaque handle */
649
650 /* getblk sets */
651 xoff_t curblkno; /* current block number: client
652 sets after getblk request */
653 usize_t onblk; /* number of bytes on current
654 block: client sets, must be >= 0
655 and <= blksize */
656 const uint8_t *curblk; /* current block array: client
657 sets after getblk request */
658
659 /* xd3 sets */
660 usize_t srclen; /* length of this source window */
661 xoff_t srcbase; /* offset of this source window
662 in the source itself */
663 int shiftby; /* for power-of-two blocksizes */
664 int maskby; /* for power-of-two blocksizes */
665 xoff_t cpyoff_blocks; /* offset of dec_cpyoff in blocks */
666 usize_t cpyoff_blkoff; /* offset of copy window in
667 blocks, remainder */
668 xoff_t getblkno; /* request block number: xd3 sets
669 current getblk request */
670
671 /* See xd3_getblk() */
672 xoff_t max_blkno; /* Maximum block, if eof is known,
673 * otherwise, equals frontier_blkno
674 * (initially 0). */
675 xoff_t frontier_blkno; /* If eof is unknown, the next
676 * source position to be read.
677 * Otherwise, equal to
678 * max_blkno. */
679 usize_t onlastblk; /* Number of bytes on max_blkno */
680 int eof_known; /* Set to true when the first
681 * partial block is read. */
682 };
683
684 /* The primary xd3_stream object, used for encoding and decoding. You
685 * may access only two fields: avail_out, next_out. Use the methods
686 * above to operate on xd3_stream. */
687 struct _xd3_stream
688 {
689 /* input state */
690 const uint8_t *next_in; /* next input byte */
691 usize_t avail_in; /* number of bytes available at
692 next_in */
693 xoff_t total_in; /* how many bytes in */
694
695 /* output state */
696 uint8_t *next_out; /* next output byte */
697 usize_t avail_out; /* number of bytes available at
698 next_out */
699 usize_t space_out; /* total out space */
700 xoff_t current_window; /* number of windows encoded/decoded */
701 xoff_t total_out; /* how many bytes out */
702
703 /* to indicate an error, xd3 sets */
704 const char *msg; /* last error message, NULL if
705 no error */
706
707 /* source configuration */
708 xd3_source *src; /* source array */
709
710 /* encoder memory configuration */
711 usize_t winsize; /* suggested window size */
712 usize_t sprevsz; /* small string, previous window
713 size (power of 2) */
714 usize_t sprevmask; /* small string, previous window
715 size mask */
716 usize_t iopt_size;
717 usize_t iopt_unlimited;
718 usize_t srcwin_maxsz;
719
720 /* general configuration */
721 xd3_getblk_func *getblk; /* set nxtblk, nxtblkno to scanblkno */
722 xd3_alloc_func *alloc; /* malloc function */
723 xd3_free_func *free; /* free function */
724 void* opaque; /* private data object passed to
725 alloc, free, and getblk */
726 int flags; /* various options */
727
728 /* secondary compressor configuration */
729 xd3_sec_cfg sec_data; /* Secondary compressor config: data */
730 xd3_sec_cfg sec_inst; /* Secondary compressor config: inst */
731 xd3_sec_cfg sec_addr; /* Secondary compressor config: addr */
732
733 xd3_smatcher smatcher;
734
735 usize_t *large_table; /* table of large checksums */
736 xd3_hash_cfg large_hash; /* large hash config */
737
738 usize_t *small_table; /* table of small checksums */
739 xd3_slist *small_prev; /* table of previous offsets,
740 circular linked list */
741 int small_reset; /* true if small table should
742 be reset */
743
744 xd3_hash_cfg small_hash; /* small hash config */
745 xd3_addr_cache acache; /* the vcdiff address cache */
746 xd3_encode_state enc_state; /* state of the encoder */
747
748 usize_t taroff; /* base offset of the target input */
749 usize_t input_position; /* current input position */
750 usize_t min_match; /* current minimum match
751 length, avoids redundent
752 matches */
753 usize_t unencoded_offset; /* current input, first
754 * unencoded offset. this value
755 * is <= the first instruction's
756 * position in the iopt buffer,
757 * if there is at least one
758 * match in the buffer. */
759
760 // SRCWIN
761 // these variables plus srcwin_maxsz above (set by config)
762 int srcwin_decided; /* boolean: true if srclen and
763 srcbase have been
764 decided. */
765 int srcwin_decided_early; /* boolean: true if srclen
766 and srcbase were
767 decided early. */
768 xoff_t srcwin_cksum_pos; /* Source checksum position */
769
770 // MATCH
771 xd3_match_state match_state; /* encoder match state */
772 xoff_t match_srcpos; /* current match source
773 position relative to
774 srcbase */
775 xoff_t match_last_srcpos; /* previously attempted
776 * srcpos, to avoid loops. */
777 xoff_t match_minaddr; /* smallest matching address to
778 * set window params (reset each
779 * window xd3_encode_reset) */
780 xoff_t match_maxaddr; /* largest matching address to
781 * set window params (reset each
782 * window xd3_encode_reset) */
783 usize_t match_back; /* match extends back so far */
784 usize_t match_maxback; /* match extends back maximum */
785 usize_t match_fwd; /* match extends forward so far */
786 usize_t match_maxfwd; /* match extends forward maximum */
787
788 xoff_t maxsrcaddr; /* address of the last source
789 match (across windows) */
790
791 uint8_t *buf_in; /* for saving buffered input */
792 usize_t buf_avail; /* amount of saved input */
793 const uint8_t *buf_leftover; /* leftover content of next_in
794 (i.e., user's buffer) */
795 usize_t buf_leftavail; /* amount of leftover content */
796
797 xd3_output *enc_current; /* current output buffer */
798 xd3_output *enc_free; /* free output buffers */
799 xd3_output *enc_heads[4]; /* array of encoded outputs:
800 head of chain */
801 xd3_output *enc_tails[4]; /* array of encoded outputs:
802 tail of chain */
803 uint32_t recode_adler32; /* set the adler32 checksum
804 * during "recode". */
805
806 xd3_rlist iopt_used; /* instruction optimizing buffer */
807 xd3_rlist iopt_free;
808 xd3_rinst *iout; /* next single instruction */
809 xd3_iopt_buflist *iopt_alloc;
810
811 const uint8_t *enc_appheader; /* application header to encode */
812 usize_t enc_appheadsz; /* application header size */
813
814 /* decoder stuff */
815 xd3_decode_state dec_state; /* current DEC_XXX value */
816 usize_t dec_hdr_ind; /* VCDIFF header indicator */
817 usize_t dec_win_ind; /* VCDIFF window indicator */
818 usize_t dec_del_ind; /* VCDIFF delta indicator */
819
820 uint8_t dec_magic[4]; /* First four bytes */
821 usize_t dec_magicbytes; /* Magic position. */
822
823 usize_t dec_secondid; /* Optional secondary compressor ID. */
824
825 /* TODO: why decode breaks if this is usize_t? */
826 uint32_t dec_codetblsz; /* Optional code table: length. */
827 uint8_t *dec_codetbl; /* Optional code table: storage. */
828 usize_t dec_codetblbytes; /* Optional code table: position. */
829
830 /* TODO: why decode breaks if this is usize_t? */
831 uint32_t dec_appheadsz; /* Optional application header:
832 size. */
833 uint8_t *dec_appheader; /* Optional application header:
834 storage */
835 usize_t dec_appheadbytes; /* Optional application header:
836 position. */
837
838 usize_t dec_cksumbytes; /* Optional checksum: position. */
839 uint8_t dec_cksum[4]; /* Optional checksum: storage. */
840 uint32_t dec_adler32; /* Optional checksum: value. */
841
842 /* TODO: why decode breaks if this is usize_t? */
843 uint32_t dec_cpylen; /* length of copy window
844 (VCD_SOURCE or VCD_TARGET) */
845 xoff_t dec_cpyoff; /* offset of copy window
846 (VCD_SOURCE or VCD_TARGET) */
847 /* TODO: why decode breaks if this is usize_t? */
848 uint32_t dec_enclen; /* length of delta encoding */
849 /* TODO: why decode breaks if this is usize_t? */
850 uint32_t dec_tgtlen; /* length of target window */
851
852 #if USE_UINT64
853 uint64_t dec_64part; /* part of a decoded uint64_t */
854 #endif
855 #if USE_UINT32
856 uint32_t dec_32part; /* part of a decoded uint32_t */
857 #endif
858
859 xoff_t dec_winstart; /* offset of the start of
860 current target window */
861 xoff_t dec_window_count; /* == current_window + 1 in
862 DEC_FINISH */
863 usize_t dec_winbytes; /* bytes of the three sections
864 so far consumed */
865 usize_t dec_hdrsize; /* VCDIFF + app header size */
866
867 const uint8_t *dec_tgtaddrbase; /* Base of decoded target
868 addresses (addr >=
869 dec_cpylen). */
870 const uint8_t *dec_cpyaddrbase; /* Base of decoded copy
871 addresses (addr <
872 dec_cpylen). */
873
874 usize_t dec_position; /* current decoder position
875 counting the cpylen
876 offset */
877 usize_t dec_maxpos; /* maximum decoder position
878 counting the cpylen
879 offset */
880 xd3_hinst dec_current1; /* current instruction */
881 xd3_hinst dec_current2; /* current instruction */
882
883 uint8_t *dec_buffer; /* Decode buffer */
884 uint8_t *dec_lastwin; /* In case of VCD_TARGET, the
885 last target window. */
886 usize_t dec_lastlen; /* length of the last target
887 window */
888 xoff_t dec_laststart; /* offset of the start of last
889 target window */
890 usize_t dec_lastspace; /* allocated space of last
891 target window, for reuse */
892
893 xd3_desect inst_sect; /* staging area for decoding
894 window sections */
895 xd3_desect addr_sect;
896 xd3_desect data_sect;
897
898 xd3_code_table_func *code_table_func;
899 xd3_comp_table_func *comp_table_func;
900 const xd3_dinst *code_table;
901 const xd3_code_table_desc *code_table_desc;
902 xd3_dinst *code_table_alloc;
903
904 /* secondary compression */
905 const xd3_sec_type *sec_type;
906 xd3_sec_stream *sec_stream_d;
907 xd3_sec_stream *sec_stream_i;
908 xd3_sec_stream *sec_stream_a;
909
910 /* state for reconstructing whole files (e.g., for merge), this only
911 * supports loading USIZE_T_MAX instructions, adds, etc. */
912 xd3_whole_state whole_target;
913
914 /* statistics */
915 xoff_t n_scpy;
916 xoff_t n_tcpy;
917 xoff_t n_add;
918 xoff_t n_run;
919
920 xoff_t l_scpy;
921 xoff_t l_tcpy;
922 xoff_t l_add;
923 xoff_t l_run;
924
925 usize_t i_slots_used;
926 };
927
928 /**************************************************************************
929 PUBLIC FUNCTIONS
930 **************************************************************************/
931
932 /* This function configures an xd3_stream using the provided in-memory
933 * input buffer, source buffer, output buffer, and flags. The output
934 * array must be large enough or else ENOSPC will be returned. This
935 * is the simplest in-memory encoding interface. */
936 int xd3_encode_memory (const uint8_t *input,
937 usize_t input_size,
938 const uint8_t *source,
939 usize_t source_size,
940 uint8_t *output_buffer,
941 usize_t *output_size,
942 usize_t avail_output,
943 int flags);
944
945 /* The reverse of xd3_encode_memory. */
946 int xd3_decode_memory (const uint8_t *input,
947 usize_t input_size,
948 const uint8_t *source,
949 usize_t source_size,
950 uint8_t *output_buf,
951 usize_t *output_size,
952 usize_t avail_output,
953 int flags);
954
955 /* This function encodes an in-memory input using a pre-configured
956 * xd3_stream. This allows the caller to set a variety of options
957 * which are not available in the xd3_encode/decode_memory()
958 * functions.
959 *
960 * The output array must be large enough to hold the output or else
961 * ENOSPC is returned. The source (if any) should be set using
962 * xd3_set_source_and_size() with a single-block xd3_source. This
963 * calls the underlying non-blocking interfaces,
964 * xd3_encode/decode_input(), handling the necessary input/output
965 * states. This method may be considered a reference for any
966 * application using xd3_encode_input() directly.
967 *
968 * xd3_stream stream;
969 * xd3_config config;
970 * xd3_source src;
971 *
972 * memset (& src, 0, sizeof (src));
973 * memset (& stream, 0, sizeof (stream));
974 * memset (& config, 0, sizeof (config));
975 *
976 * if (source != NULL)
977 * {
978 * src.size = source_size;
979 * src.blksize = source_size;
980 * src.curblkno = 0;
981 * src.onblk = source_size;
982 * src.curblk = source;
983 * xd3_set_source(&stream, &src);
984 * }
985 *
986 * config.flags = flags;
987 * config.srcwin_maxsz = source_size;
988 * config.winsize = input_size;
989 *
990 * ... set smatcher, appheader, encoding-table, compression-level, etc.
991 *
992 * xd3_config_stream(&stream, &config);
993 * xd3_encode_stream(&stream, ...);
994 * xd3_free_stream(&stream);
995 */
996 int xd3_encode_stream (xd3_stream *stream,
997 const uint8_t *input,
998 usize_t input_size,
999 uint8_t *output,
1000 usize_t *output_size,
1001 usize_t avail_output);
1002
1003 /* The reverse of xd3_encode_stream. */
1004 int xd3_decode_stream (xd3_stream *stream,
1005 const uint8_t *input,
1006 usize_t input_size,
1007 uint8_t *output,
1008 usize_t *output_size,
1009 usize_t avail_size);
1010
1011 /* This is the non-blocking interface.
1012 *
1013 * Handling input and output states is the same for encoding or
1014 * decoding using the xd3_avail_input() and xd3_consume_output()
1015 * routines, inlined below.
1016 *
1017 * Return values:
1018 *
1019 * XD3_INPUT: the process requires more input: call
1020 * xd3_avail_input() then repeat
1021 *
1022 * XD3_OUTPUT: the process has more output: read stream->next_out,
1023 * stream->avail_out, then call xd3_consume_output(),
1024 * then repeat
1025 *
1026 * XD3_GOTHEADER: (decoder-only) notification returned following the
1027 * VCDIFF header and first window header. the decoder
1028 * may use the header to configure itself.
1029 *
1030 * XD3_WINSTART: a general notification returned once for each
1031 * window except the 0-th window, which is implied by
1032 * XD3_GOTHEADER. It is recommended to use a
1033 * switch-stmt such as:
1034 *
1035 * ...
1036 * again:
1037 * switch ((ret = xd3_decode_input (stream))) {
1038 * case XD3_GOTHEADER: {
1039 * assert(stream->current_window == 0);
1040 * stuff;
1041 * }
1042 * // fallthrough
1043 * case XD3_WINSTART: {
1044 * something(stream->current_window);
1045 * goto again;
1046 * }
1047 * ...
1048 *
1049 * XD3_WINFINISH: a general notification, following the complete
1050 * input & output of a window. at this point,
1051 * stream->total_in and stream->total_out are consistent
1052 * for either encoding or decoding.
1053 *
1054 * XD3_GETSRCBLK: If the xd3_getblk() callback is NULL, this value
1055 * is returned to initiate a non-blocking source read.
1056 */
1057 int xd3_decode_input (xd3_stream *stream);
1058 int xd3_encode_input (xd3_stream *stream);
1059
1060 /* The xd3_config structure is used to initialize a stream - all data
1061 * is copied into stream so config may be a temporary variable. See
1062 * the [documentation] or comments on the xd3_config structure. */
1063 int xd3_config_stream (xd3_stream *stream,
1064 xd3_config *config);
1065
1066 /* Since Xdelta3 doesn't open any files, xd3_close_stream is just an
1067 * error check that the stream is in a proper state to be closed: this
1068 * means the encoder is flushed and the decoder is at a window
1069 * boundary. The application is responsible for freeing any of the
1070 * resources it supplied. */
1071 int xd3_close_stream (xd3_stream *stream);
1072
1073 /* This arranges for closes the stream to succeed. Does not free the
1074 * stream.*/
1075 void xd3_abort_stream (xd3_stream *stream);
1076
1077 /* xd3_free_stream frees all memory allocated for the stream. The
1078 * application is responsible for freeing any of the resources it
1079 * supplied. */
1080 void xd3_free_stream (xd3_stream *stream);
1081
1082 /* This function informs the encoder or decoder that source matching
1083 * (i.e., delta-compression) is possible. For encoding, this should
1084 * be called before the first xd3_encode_input. A NULL source is
1085 * ignored. For decoding, this should be called before the first
1086 * window is decoded, but the appheader may be read first
1087 * (XD3_GOTHEADER). After decoding the header, call xd3_set_source()
1088 * if you have a source file. Note: if (stream->dec_win_ind & VCD_SOURCE)
1089 * is true, it means the first window expects there to be a source file.
1090 */
1091 int xd3_set_source (xd3_stream *stream,
1092 xd3_source *source);
1093
1094 /* If the source size is known, call this instead of xd3_set_source().
1095 * to avoid having stream->getblk called (and/or to avoid XD3_GETSRCBLK).
1096 *
1097 * Follow these steps:
1098 xd3_source source;
1099 memset(&source, 0, sizeof(source));
1100 source.blksize = size;
1101 source.onblk = size;
1102 source.curblk = buf;
1103 source.curblkno = 0;
1104 int ret = xd3_set_source_and_size(&stream, &source, size);
1105 ...
1106 */
1107 int xd3_set_source_and_size (xd3_stream *stream,
1108 xd3_source *source,
1109 xoff_t source_size);
1110
1111 /* This should be called before the first call to xd3_encode_input()
1112 * to include application-specific data in the VCDIFF header. */
1113 void xd3_set_appheader (xd3_stream *stream,
1114 const uint8_t *data,
1115 usize_t size);
1116
1117 /* xd3_get_appheader may be called in the decoder after XD3_GOTHEADER.
1118 * For convenience, the decoder always adds a single byte padding to
1119 * the end of the application header, which is set to zero in case the
1120 * application header is a string. */
1121 int xd3_get_appheader (xd3_stream *stream,
1122 uint8_t **data,
1123 usize_t *size);
1124
1125 /* To generate a VCDIFF encoded delta with xd3_encode_init() from
1126 * another format, use:
1127 *
1128 * xd3_encode_init_partial() -- initialze encoder state (w/o hash tables)
1129 * xd3_init_cache() -- reset VCDIFF address cache
1130 * xd3_found_match() -- to report a copy instruction
1131 *
1132 * set stream->enc_state to ENC_INSTR and call xd3_encode_input as usual.
1133 */
1134 int xd3_encode_init_partial (xd3_stream *stream);
1135 void xd3_init_cache (xd3_addr_cache* acache);
1136 int xd3_found_match (xd3_stream *stream,
1137 usize_t pos, usize_t size,
1138 xoff_t addr, int is_source);
1139
1140 /* Gives an error string for xdelta3-speficic errors, returns NULL for
1141 system errors */
1142 const char* xd3_strerror (int ret);
1143
1144 /* For convenience, zero & initialize the xd3_config structure with
1145 specified flags. */
1146 static inline
xd3_init_config(xd3_config * config,int flags)1147 void xd3_init_config (xd3_config *config,
1148 int flags)
1149 {
1150 memset (config, 0, sizeof (*config));
1151 config->flags = flags;
1152 }
1153
1154 /* This supplies some input to the stream.
1155 *
1156 * For encoding, if the input is larger than the configured window
1157 * size (xd3_config.winsize), the entire input will be consumed and
1158 * encoded anyway. If you wish to strictly limit the window size,
1159 * limit the buffer passed to xd3_avail_input to the window size.
1160 *
1161 * For encoding, if the input is smaller than the configured window
1162 * size (xd3_config.winsize), the library will create a window-sized
1163 * buffer and accumulate input until a full-sized window can be
1164 * encoded. XD3_INPUT will be returned. The input must remain valid
1165 * until the next time xd3_encode_input() returns XD3_INPUT.
1166 *
1167 * For decoding, the input will be consumed entirely before XD3_INPUT
1168 * is returned again.
1169 */
1170 static inline
xd3_avail_input(xd3_stream * stream,const uint8_t * idata,usize_t isize)1171 void xd3_avail_input (xd3_stream *stream,
1172 const uint8_t *idata,
1173 usize_t isize)
1174 {
1175 /* Even if isize is zero, the code expects a non-NULL idata. Why?
1176 * It uses this value to determine whether xd3_avail_input has ever
1177 * been called. If xd3_encode_input is called before
1178 * xd3_avail_input it will return XD3_INPUT right away without
1179 * allocating a stream->winsize buffer. This is to avoid an
1180 * unwanted allocation. */
1181 XD3_ASSERT (idata != NULL || isize == 0);
1182
1183 stream->next_in = idata;
1184 stream->avail_in = isize;
1185 }
1186
1187 /* This acknowledges receipt of output data, must be called after any
1188 * XD3_OUTPUT return. */
1189 static inline
xd3_consume_output(xd3_stream * stream)1190 void xd3_consume_output (xd3_stream *stream)
1191 {
1192 stream->avail_out = 0;
1193 }
1194
1195 /* These are set for each XD3_WINFINISH return. */
1196 static inline
xd3_encoder_used_source(xd3_stream * stream)1197 int xd3_encoder_used_source (xd3_stream *stream) {
1198 return stream->src != NULL && stream->src->srclen > 0;
1199 }
1200 static inline
xd3_encoder_srcbase(xd3_stream * stream)1201 xoff_t xd3_encoder_srcbase (xd3_stream *stream) {
1202 return stream->src->srcbase;
1203 }
1204 static inline
xd3_encoder_srclen(xd3_stream * stream)1205 usize_t xd3_encoder_srclen (xd3_stream *stream) {
1206 return stream->src->srclen;
1207 }
1208
1209 /* Checks for legal flag changes. */
1210 static inline
xd3_set_flags(xd3_stream * stream,int flags)1211 void xd3_set_flags (xd3_stream *stream, int flags)
1212 {
1213 /* The bitwise difference should contain only XD3_FLUSH or
1214 XD3_SKIP_WINDOW */
1215 XD3_ASSERT(((flags ^ stream->flags) & ~(XD3_FLUSH | XD3_SKIP_WINDOW)) == 0);
1216 stream->flags = flags;
1217 }
1218
1219 /* Gives some extra information about the latest library error, if any
1220 * is known. */
1221 static inline
xd3_errstring(xd3_stream * stream)1222 const char* xd3_errstring (xd3_stream *stream)
1223 {
1224 return stream->msg ? stream->msg : "";
1225 }
1226
1227
1228 /* 64-bit divisions are expensive, which is why we require a
1229 * power-of-two source->blksize. To relax this restriction is
1230 * relatively easy, see the history for xd3_blksize_div(). */
1231 static inline
xd3_blksize_div(const xoff_t offset,const xd3_source * source,xoff_t * blkno,usize_t * blkoff)1232 void xd3_blksize_div (const xoff_t offset,
1233 const xd3_source *source,
1234 xoff_t *blkno,
1235 usize_t *blkoff) {
1236 *blkno = (xoff_t) (offset >> source->shiftby);
1237 *blkoff = (usize_t) (offset & source->maskby);
1238 XD3_ASSERT (*blkoff < source->blksize);
1239 }
1240
1241 static inline
xd3_blksize_add(xoff_t * blkno,usize_t * blkoff,const xd3_source * source,const usize_t add)1242 void xd3_blksize_add (xoff_t *blkno,
1243 usize_t *blkoff,
1244 const xd3_source *source,
1245 const usize_t add)
1246 {
1247 usize_t blkdiff;
1248
1249 /* Does not check for overflow, checked in xdelta3-decode.h. */
1250 *blkoff += add;
1251 blkdiff = *blkoff >> source->shiftby;
1252
1253 if (blkdiff)
1254 {
1255 *blkno += blkdiff;
1256 *blkoff &= source->maskby;
1257 }
1258
1259 XD3_ASSERT (*blkoff < source->blksize);
1260 }
1261
1262 #endif /* _XDELTA3_H_ */
1263