1 /*
2    Copyright (C) 2006-2016 Con Kolivas
3    Copyright (C) 2011 Peter Hyman
4    Copyright (C) 1998-2003 Andrew Tridgell
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19 
20 #ifndef LRZIP_PRIV_H
21 #define LRZIP_PRIV_H
22 
23 #include "config.h"
24 
25 #define NUM_STREAMS 2
26 #define STREAM_BUFSIZE (1024 * 1024 * 10)
27 
28 #include <stdlib.h>
29 #include <stdint.h>
30 #include <stdio.h>
31 #include <stdbool.h>
32 #include <stdarg.h>
33 #include <semaphore.h>
34 
35 #ifdef HAVE_PTHREAD_H
36 # include <pthread.h>
37 #endif
38 
39 #ifdef HAVE_STRING_H
40 # include <string.h>
41 #endif
42 
43 #ifdef HAVE_MALLOC_H
44 # include <malloc.h>
45 #endif
46 
47 #ifdef HAVE_ALLOCA_H
48 # include <alloca.h>
49 #elif defined __GNUC__
50 # include <stdlib.h>
51 #elif defined _AIX
52 # define alloca __alloca
53 #elif defined _MSC_VER
54 # include <malloc.h>
55 # define alloca _alloca
56 #else
57 # include <stddef.h>
58 # ifdef  __cplusplus
59 extern "C"
60 # endif
61 void *alloca (size_t);
62 #endif
63 
64 #ifdef HAVE_ENDIAN_H
65 # include <endian.h>
66 #elif HAVE_SYS_ENDIAN_H
67 # include <sys/endian.h>
68 #endif
69 #ifndef __BYTE_ORDER
70 # ifndef __BIG_ENDIAN
71 #  define __BIG_ENDIAN	4321
72 #  define __LITTLE_ENDIAN	1234
73 # endif
74 # ifdef WORDS_BIGENDIAN
75 #  define __BYTE_ORDER __BIG_ENDIAN
76 # else
77 #  define __BYTE_ORDER __LITTLE_ENDIAN
78 # endif
79 #endif
80 
81 #ifndef MD5_DIGEST_SIZE
82 # define MD5_DIGEST_SIZE 16
83 #endif
84 
85 #define free(X) do { free((X)); (X) = NULL; } while (0)
86 
87 #ifndef strdupa
88 # define strdupa(str) strcpy(alloca(strlen(str) + 1), str)
89 #endif
90 
91 #ifndef strndupa
92 # define strndupa(str, len) strncpy(alloca(len + 1), str, len)
93 #endif
94 
95 
96 #ifndef uchar
97 #define uchar unsigned char
98 #endif
99 
100 #ifndef int32
101 #if (SIZEOF_INT == 4)
102 #define int32 int
103 #elif (SIZEOF_LONG == 4)
104 #define int32 long
105 #elif (SIZEOF_SHORT == 4)
106 #define int32 short
107 #endif
108 #endif
109 
110 #ifndef int16
111 #if (SIZEOF_INT == 2)
112 #define int16 int
113 #elif (SIZEOF_SHORT == 2)
114 #define int16 short
115 #endif
116 #endif
117 
118 #ifndef uint32
119 #define uint32 unsigned int32
120 #endif
121 
122 #ifndef uint16
123 #define uint16 unsigned int16
124 #endif
125 
126 #ifndef MIN
127 #define MIN(a, b) ((a) < (b)? (a): (b))
128 #endif
129 
130 #ifndef MAX
131 #define MAX(a, b) ((a) > (b)? (a): (b))
132 #endif
133 
134 #if !HAVE_STRERROR
135 extern char *sys_errlist[];
136 #define strerror(i) sys_errlist[i]
137 #endif
138 
139 #ifndef HAVE_ERRNO_H
140 extern int errno;
141 #endif
142 
143 #define likely(x)	__builtin_expect(!!(x), 1)
144 #define unlikely(x)	__builtin_expect(!!(x), 0)
145 #define __maybe_unused	__attribute__((unused))
146 
147 #if defined(__MINGW32__) || defined(__CYGWIN__) || defined(ANDROID) || defined(__APPLE__)
148 # define ffsll __builtin_ffsll
149 #endif
150 
151 typedef int64_t i64;
152 typedef uint32_t u32;
153 
154 typedef struct rzip_control rzip_control;
155 typedef struct md5_ctx md5_ctx;
156 
157 /* ck specific unnamed semaphore implementations to cope with osx not
158  * implementing them. */
159 #ifdef __APPLE__
160 struct cksem {
161 	int pipefd[2];
162 };
163 
164 typedef struct cksem cksem_t;
165 #else
166 typedef sem_t cksem_t;
167 #endif
168 
169 #if !defined(__linux)
170  #define mremap fake_mremap
171 #endif
172 
173 #if defined(__APPLE__)
174 # define MD5_RELIABLE (0)
175 #else
176 # define MD5_RELIABLE (1)
177 #endif
178 
179 #define bswap_32(x) \
180      ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >>  8) |		      \
181       (((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))
182 
183 # define bswap_64(x) \
184      ((((x) & 0xff00000000000000ull) >> 56)				      \
185       | (((x) & 0x00ff000000000000ull) >> 40)				      \
186       | (((x) & 0x0000ff0000000000ull) >> 24)				      \
187       | (((x) & 0x000000ff00000000ull) >> 8)				      \
188       | (((x) & 0x00000000ff000000ull) << 8)				      \
189       | (((x) & 0x0000000000ff0000ull) << 24)				      \
190       | (((x) & 0x000000000000ff00ull) << 40)				      \
191       | (((x) & 0x00000000000000ffull) << 56))
192 
193 #ifdef leto32h
194 # define le32toh(x) leto32h(x)
195 # define le64toh(x) leto64h(x)
196 #endif
197 
198 #ifndef le32toh
199 # if __BYTE_ORDER == __LITTLE_ENDIAN
200 #  define htole32(x) (x)
201 #  define le32toh(x) (x)
202 #  define htole64(x) (x)
203 #  define le64toh(x) (x)
204 # elif __BYTE_ORDER == __BIG_ENDIAN
205 #  define htole32(x) bswap_32 (x)
206 #  define le32toh(x) bswap_32 (x)
207 #  define htole64(x) bswap_64 (x)
208 #  define le64toh(x) bswap_64 (x)
209 #else
210 #error UNKNOWN BYTE ORDER
211 #endif
212 #endif
213 
214 #define FLAG_SHOW_PROGRESS	(1 << 0)
215 #define FLAG_KEEP_FILES		(1 << 1)
216 #define FLAG_TEST_ONLY		(1 << 2)
217 #define FLAG_FORCE_REPLACE	(1 << 3)
218 #define FLAG_DECOMPRESS		(1 << 4)
219 #define FLAG_NO_COMPRESS	(1 << 5)
220 #define FLAG_LZO_COMPRESS	(1 << 6)
221 #define FLAG_BZIP2_COMPRESS	(1 << 7)
222 #define FLAG_ZLIB_COMPRESS	(1 << 8)
223 #define FLAG_ZPAQ_COMPRESS	(1 << 9)
224 #define FLAG_VERBOSITY		(1 << 10)
225 #define FLAG_VERBOSITY_MAX	(1 << 11)
226 #define FLAG_STDIN		(1 << 12)
227 #define FLAG_STDOUT		(1 << 13)
228 #define FLAG_INFO		(1 << 14)
229 #define FLAG_UNLIMITED		(1 << 15)
230 #define FLAG_HASH		(1 << 16)
231 #define FLAG_MD5		(1 << 17)
232 #define FLAG_CHECK		(1 << 18)
233 #define FLAG_KEEP_BROKEN	(1 << 19)
234 #define FLAG_THRESHOLD		(1 << 20)
235 #define FLAG_TMP_OUTBUF		(1 << 21)
236 #define FLAG_TMP_INBUF		(1 << 22)
237 #define FLAG_ENCRYPT		(1 << 23)
238 
239 #define NO_MD5		(!(HASH_CHECK) && !(HAS_MD5))
240 
241 #define BITS32		(sizeof(long) == 4)
242 
243 #define CTYPE_NONE 3
244 #define CTYPE_BZIP2 4
245 #define CTYPE_LZO 5
246 #define CTYPE_LZMA 6
247 #define CTYPE_GZIP 7
248 #define CTYPE_ZPAQ 8
249 
250 #define PASS_LEN 512
251 #define HASH_LEN 64
252 #define SALT_LEN 8
253 #define CBC_LEN 16
254 
255 #define one_g (1000 * 1024 * 1024)
256 
257 #if defined(NOTHREAD) || !defined(_SC_NPROCESSORS_ONLN)
258 # define PROCESSORS (1)
259 #else
260 # define PROCESSORS (sysconf(_SC_NPROCESSORS_ONLN))
261 #endif
262 
263 #ifdef _SC_PAGE_SIZE
264 # define PAGE_SIZE (sysconf(_SC_PAGE_SIZE))
265 #else
266 # define PAGE_SIZE (4096)
267 #endif
268 
269 /* Determine how many times to hash the password when encrypting, based on
270  * the date such that we increase the number of loops according to Moore's
271  * law relative to when the data is encrypted. It is then stored as a two
272  * byte value in the header */
273 #define MOORE 1.835          // world constant  [TIMES per YEAR]
274 #define ARBITRARY  1000000   // number of sha2 calls per one second in 2011
275 #define T_ZERO 1293840000    // seconds since epoch in 2011
276 
277 #define SECONDS_IN_A_YEAR (365*86400)
278 #define MOORE_TIMES_PER_SECOND pow (MOORE, 1.0 / SECONDS_IN_A_YEAR)
279 #define ARBITRARY_AT_EPOCH (ARBITRARY * pow (MOORE_TIMES_PER_SECOND, -T_ZERO))
280 
281 #define FLAG_VERBOSE (FLAG_VERBOSITY | FLAG_VERBOSITY_MAX)
282 #define FLAG_NOT_LZMA (FLAG_NO_COMPRESS | FLAG_LZO_COMPRESS | FLAG_BZIP2_COMPRESS | FLAG_ZLIB_COMPRESS | FLAG_ZPAQ_COMPRESS)
283 #define LZMA_COMPRESS	(!(control->flags & FLAG_NOT_LZMA))
284 
285 #define SHOW_PROGRESS	(control->flags & FLAG_SHOW_PROGRESS)
286 #define KEEP_FILES	(control->flags & FLAG_KEEP_FILES)
287 #define TEST_ONLY	(control->flags & FLAG_TEST_ONLY)
288 #define FORCE_REPLACE	(control->flags & FLAG_FORCE_REPLACE)
289 #define DECOMPRESS	(control->flags & FLAG_DECOMPRESS)
290 #define NO_COMPRESS	(control->flags & FLAG_NO_COMPRESS)
291 #define LZO_COMPRESS	(control->flags & FLAG_LZO_COMPRESS)
292 #define BZIP2_COMPRESS	(control->flags & FLAG_BZIP2_COMPRESS)
293 #define ZLIB_COMPRESS	(control->flags & FLAG_ZLIB_COMPRESS)
294 #define ZPAQ_COMPRESS	(control->flags & FLAG_ZPAQ_COMPRESS)
295 #define VERBOSE		(control->flags & FLAG_VERBOSE)
296 #define VERBOSITY	(control->flags & FLAG_VERBOSITY)
297 #define MAX_VERBOSE	(control->flags & FLAG_VERBOSITY_MAX)
298 #define STDIN		(control->flags & FLAG_STDIN)
299 #define STDOUT		(control->flags & FLAG_STDOUT)
300 #define INFO		(control->flags & FLAG_INFO)
301 #define UNLIMITED	(control->flags & FLAG_UNLIMITED)
302 #define HASH_CHECK	(control->flags & FLAG_HASH)
303 #define HAS_MD5		(control->flags & FLAG_MD5)
304 #define CHECK_FILE	(control->flags & FLAG_CHECK)
305 #define KEEP_BROKEN	(control->flags & FLAG_KEEP_BROKEN)
306 #define LZO_TEST	(control->flags & FLAG_THRESHOLD)
307 #define TMP_OUTBUF	(control->flags & FLAG_TMP_OUTBUF)
308 #define TMP_INBUF	(control->flags & FLAG_TMP_INBUF)
309 #define ENCRYPT		(control->flags & FLAG_ENCRYPT)
310 
311 
312 /* Structure to save state of computation between the single steps.  */
313 struct md5_ctx
314 {
315 	uint32_t A;
316 	uint32_t B;
317 	uint32_t C;
318 	uint32_t D;
319 
320 	uint32_t total[2];
321 	uint32_t buflen;
322 	uint32_t buffer[32];
323 };
324 
325 struct sliding_buffer {
326 	uchar *buf_low;	/* The low window buffer */
327 	uchar *buf_high;/* "" high "" */
328 	i64 orig_offset;/* Where the original buffer started */
329 	i64 offset_low;	/* What the current offset the low buffer has */
330 	i64 offset_high;/* "" high buffer "" */
331 	i64 offset_search;/* Where the search is up to */
332 	i64 orig_size;	/* How big the full buffer would be */
333 	i64 size_low;	/* How big the low buffer is */
334 	i64 size_high;	/* "" high "" */
335 	i64 high_length;/* How big the high buffer should be */
336 	int fd;		/* The fd of the mmap */
337 };
338 
339 struct checksum {
340 	uint32_t *cksum;
341 	uchar *buf;
342 	i64 len;
343 };
344 
345 typedef i64 tag;
346 
347 struct rzip_state {
348 	void *ss;
349 	struct level *level;
350 	tag hash_index[256];
351 	struct hash_entry *hash_table;
352 	char hash_bits;
353 	i64 hash_count;
354 	i64 hash_limit;
355 	tag minimum_tag_mask;
356 	i64 tag_clean_ptr;
357 	i64 last_match;
358 	i64 chunk_size;
359 	i64 mmap_size;
360 	char chunk_bytes;
361 	uint32_t cksum;
362 	int fd_in, fd_out;
363 	char stdin_eof;
364 	struct {
365 		i64 inserts;
366 		i64 literals;
367 		i64 literal_bytes;
368 		i64 matches;
369 		i64 match_bytes;
370 		i64 tag_hits;
371 		i64 tag_misses;
372 	} stats;
373 };
374 
375 struct rzip_control {
376 	char *infile;
377 	FILE *inFILE; // if a FILE is being read from
378 	char *outname;
379 	char *outfile;
380 	FILE *outFILE; // if a FILE is being written to
381 	char *outdir;
382 	char *tmpdir; // when stdin, stdout, or test used
383 	uchar *tmp_outbuf; // Temporary file storage for stdout
384 	i64 out_ofs; // Output offset when tmp_outbuf in use
385 	i64 hist_ofs; // History offset
386 	i64 out_len; // Total length of tmp_outbuf
387 	i64 out_maxlen; // The largest the tmp_outbuf can be used
388 	i64 out_relofs; // Relative tmp_outbuf offset when stdout has been flushed
389 	uchar *tmp_inbuf;
390 	i64 in_ofs;
391 	i64 in_len;
392 	i64 in_maxlen;
393 	FILE *msgout; //stream for output messages
394 	FILE *msgerr; //stream for output errors
395 	char *suffix;
396 	uchar compression_level;
397 	i64 overhead; // compressor overhead
398 	i64 usable_ram; // the most ram we'll try to use on one activity
399 	i64 maxram; // the largest chunk of ram to allocate
400 	unsigned char lzma_properties[5]; // lzma properties, encoded
401 	i64 window;
402 	unsigned long flags;
403 	i64 ramsize;
404 	i64 max_chunk;
405 	i64 max_mmap;
406 	int threads;
407 	char nice_val;		// added for consistency
408 	char major_version;
409 	char minor_version;
410 	i64 st_size;
411 	long page_size;
412 	int fd_in;
413 	int fd_out;
414 	int fd_hist;
415 	i64 encloops;
416 	i64 secs;
417 	void (*pass_cb)(void *, char *, size_t); /* callback to get password in lib */
418 	void *pass_data;
419 	uchar salt[SALT_LEN];
420 	uchar *salt_pass;
421 	int salt_pass_len;
422 	uchar *hash;
423 
424 	pthread_mutex_t control_lock;
425 	unsigned char eof;
426 	unsigned char magic_written;
427 	bool lzma_prop_set;
428 
429 	cksem_t cksumsem;
430 	md5_ctx ctx;
431 	uchar md5_resblock[MD5_DIGEST_SIZE];
432 	i64 md5_read; // How far into the file the md5 has done so far
433 	struct checksum checksum;
434 
435 	const char *util_infile;
436 	char delete_infile;
437 	const char *util_outfile;
438 #define STREAM_BUCKET_SIZE 20
439 	size_t sinfo_buckets;
440 	size_t sinfo_idx;
441 	struct stream_info **sinfo_queue;
442 	char delete_outfile;
443 	FILE *outputfile;
444 	char library_mode;
445 	int log_level;
446 	void (*info_cb)(void *data, int pct, int chunk_pct);
447 	void *info_data;
448 	void (*log_cb)(void *data, unsigned int level, unsigned int line, const char *file, const char *func, const char *format, va_list args);
449 	void *log_data;
450 
451 	char chunk_bytes;
452 	struct sliding_buffer sb;
453 	void (*do_mcpy)(rzip_control *, unsigned char *, i64, i64);
454 	void (*next_tag)(rzip_control *, struct rzip_state *, i64, tag *);
455 	tag (*full_tag)(rzip_control *, struct rzip_state *, i64);
456 	i64 (*match_len)(rzip_control *, struct rzip_state *, i64, i64, i64, i64 *);
457 };
458 
459 struct stream {
460 	i64 last_head;
461 	uchar *buf;
462 	i64 buflen;
463 	i64 bufp;
464 	uchar eos;
465 	long uthread_no;
466 	long unext_thread;
467 	long base_thread;
468 	int total_threads;
469 	i64 last_headofs;
470 };
471 
472 struct stream_info {
473 	struct stream *s;
474 	uchar num_streams;
475 	int fd;
476 	i64 bufsize;
477 	i64 cur_pos;
478 	i64 initial_pos;
479 	i64 total_read;
480 	i64 ram_alloced;
481 	i64 size;
482 	long thread_no;
483 	long next_thread;
484 	int chunks;
485 	char chunk_bytes;
486 };
487 
print_stuff(const rzip_control * control,int level,unsigned int line,const char * file,const char * func,const char * format,...)488 static inline void print_stuff(const rzip_control *control, int level, unsigned int line, const char *file, const char *func, const char *format, ...)
489 {
490 	va_list ap;
491 	if (control->library_mode && control->log_cb && (control->log_level >= level)) {
492 		va_start(ap, format);
493 		control->log_cb(control->log_data, level, line, file, func, format, ap);
494 		va_end(ap);
495 	} else if (control->msgout) {
496 		va_start(ap, format);
497 		vfprintf(control->msgout, format, ap);
498 		va_end(ap);
499 		fflush(control->msgout);
500 	}
501 }
502 
print_err(const rzip_control * control,unsigned int line,const char * file,const char * func,const char * format,...)503 static inline void print_err(const rzip_control *control, unsigned int line, const char *file, const char *func, const char *format, ...)
504 {
505 	va_list ap;
506 	if (control->library_mode && control->log_cb && (control->log_level >= 0)) {
507 		va_start(ap, format);
508 		control->log_cb(control->log_data, 0, line, file, func, format, ap);
509 		va_end(ap);
510 	} else if (control->msgerr) {
511 		va_start(ap, format);
512 		vfprintf(control->msgerr, format, ap);
513 		va_end(ap);
514 		fflush(control->msgerr);
515 	}
516 }
517 
518 #define print_stuff(level, ...) do {\
519 	print_stuff(control, level, __LINE__, __FILE__, __func__, __VA_ARGS__); \
520 } while (0)
521 
522 #define print_output(...)	do {\
523 	print_stuff(1, __VA_ARGS__); \
524 } while (0)
525 
526 #define print_progress(...)	do {\
527 	if (SHOW_PROGRESS)	\
528 		print_stuff(2, __VA_ARGS__); \
529 } while (0)
530 
531 #define print_verbose(...)	do {\
532 	if (VERBOSE)	\
533 		print_stuff(3, __VA_ARGS__); \
534 } while (0)
535 
536 #define print_maxverbose(...)	do {\
537 	if (MAX_VERBOSE)	\
538 		print_stuff(4, __VA_ARGS__); \
539 } while (0)
540 
541 #define print_err(...) do {\
542 	print_err(control, __LINE__, __FILE__, __func__, __VA_ARGS__); \
543 } while (0)
544 #endif
545