1 /*-
2 * Copyright (c) 2018 Grzegorz Antoniak (http://antoniak.org)
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 
26 #include "archive_platform.h"
27 #include "archive_endian.h"
28 
29 #ifdef HAVE_ERRNO_H
30 #include <errno.h>
31 #endif
32 #include <time.h>
33 #ifdef HAVE_ZLIB_H
34 #include <zlib.h> /* crc32 */
35 #endif
36 #ifdef HAVE_LIMITS_H
37 #include <limits.h>
38 #endif
39 
40 #include "archive.h"
41 #ifndef HAVE_ZLIB_H
42 #include "archive_crc32.h"
43 #endif
44 
45 #include "archive_entry.h"
46 #include "archive_entry_locale.h"
47 #include "archive_ppmd7_private.h"
48 #include "archive_entry_private.h"
49 
50 #ifdef HAVE_BLAKE2_H
51 #include <blake2.h>
52 #else
53 #include "archive_blake2.h"
54 #endif
55 
56 /*#define CHECK_CRC_ON_SOLID_SKIP*/
57 /*#define DONT_FAIL_ON_CRC_ERROR*/
58 /*#define DEBUG*/
59 
60 #define rar5_min(a, b) (((a) > (b)) ? (b) : (a))
61 #define rar5_max(a, b) (((a) > (b)) ? (a) : (b))
62 #define rar5_countof(X) ((const ssize_t) (sizeof(X) / sizeof(*X)))
63 
64 #if defined DEBUG
65 #define DEBUG_CODE if(1)
66 #define LOG(...) do { printf("rar5: " __VA_ARGS__); puts(""); } while(0)
67 #else
68 #define DEBUG_CODE if(0)
69 #endif
70 
71 /* Real RAR5 magic number is:
72  *
73  * 0x52, 0x61, 0x72, 0x21, 0x1a, 0x07, 0x01, 0x00
74  * "Rar!→•☺·\x00"
75  *
76  * Retrieved with `rar5_signature()` by XOR'ing it with 0xA1, because I don't
77  * want to put this magic sequence in each binary that uses libarchive, so
78  * applications that scan through the file for this marker won't trigger on
79  * this "false" one.
80  *
81  * The array itself is decrypted in `rar5_init` function. */
82 
83 static unsigned char rar5_signature_xor[] = { 243, 192, 211, 128, 187, 166, 160, 161 };
84 static const size_t g_unpack_window_size = 0x20000;
85 
86 /* These could have been static const's, but they aren't, because of
87  * Visual Studio. */
88 #define MAX_NAME_IN_CHARS 2048
89 #define MAX_NAME_IN_BYTES (4 * MAX_NAME_IN_CHARS)
90 
91 struct file_header {
92 	ssize_t bytes_remaining;
93 	ssize_t unpacked_size;
94 	int64_t last_offset;         /* Used in sanity checks. */
95 	int64_t last_size;           /* Used in sanity checks. */
96 
97 	uint8_t solid : 1;           /* Is this a solid stream? */
98 	uint8_t service : 1;         /* Is this file a service data? */
99 	uint8_t eof : 1;             /* Did we finish unpacking the file? */
100 	uint8_t dir : 1;             /* Is this file entry a directory? */
101 
102 	/* Optional time fields. */
103 	uint64_t e_mtime;
104 	uint64_t e_ctime;
105 	uint64_t e_atime;
106 	uint32_t e_unix_ns;
107 
108 	/* Optional hash fields. */
109 	uint32_t stored_crc32;
110 	uint32_t calculated_crc32;
111 	uint8_t blake2sp[32];
112 	blake2sp_state b2state;
113 	char has_blake2;
114 
115 	/* Optional redir fields */
116 	uint64_t redir_type;
117 	uint64_t redir_flags;
118 
119 	ssize_t solid_window_size; /* Used in file format check. */
120 };
121 
122 enum EXTRA {
123 	EX_CRYPT = 0x01,
124 	EX_HASH = 0x02,
125 	EX_HTIME = 0x03,
126 	EX_VERSION = 0x04,
127 	EX_REDIR = 0x05,
128 	EX_UOWNER = 0x06,
129 	EX_SUBDATA = 0x07
130 };
131 
132 #define REDIR_SYMLINK_IS_DIR	1
133 
134 enum REDIR_TYPE {
135 	REDIR_TYPE_NONE = 0,
136 	REDIR_TYPE_UNIXSYMLINK = 1,
137 	REDIR_TYPE_WINSYMLINK = 2,
138 	REDIR_TYPE_JUNCTION = 3,
139 	REDIR_TYPE_HARDLINK = 4,
140 	REDIR_TYPE_FILECOPY = 5,
141 };
142 
143 #define	OWNER_USER_NAME		0x01
144 #define	OWNER_GROUP_NAME	0x02
145 #define	OWNER_USER_UID		0x04
146 #define	OWNER_GROUP_GID		0x08
147 #define	OWNER_MAXNAMELEN	256
148 
149 enum FILTER_TYPE {
150 	FILTER_DELTA = 0,   /* Generic pattern. */
151 	FILTER_E8    = 1,   /* Intel x86 code. */
152 	FILTER_E8E9  = 2,   /* Intel x86 code. */
153 	FILTER_ARM   = 3,   /* ARM code. */
154 	FILTER_AUDIO = 4,   /* Audio filter, not used in RARv5. */
155 	FILTER_RGB   = 5,   /* Color palette, not used in RARv5. */
156 	FILTER_ITANIUM = 6, /* Intel's Itanium, not used in RARv5. */
157 	FILTER_PPM   = 7,   /* Predictive pattern matching, not used in
158 			       RARv5. */
159 	FILTER_NONE  = 8,
160 };
161 
162 struct filter_info {
163 	int type;
164 	int channels;
165 	int pos_r;
166 
167 	int64_t block_start;
168 	ssize_t block_length;
169 	uint16_t width;
170 };
171 
172 struct data_ready {
173 	char used;
174 	const uint8_t* buf;
175 	size_t size;
176 	int64_t offset;
177 };
178 
179 struct cdeque {
180 	uint16_t beg_pos;
181 	uint16_t end_pos;
182 	uint16_t cap_mask;
183 	uint16_t size;
184 	size_t* arr;
185 };
186 
187 struct decode_table {
188 	uint32_t size;
189 	int32_t decode_len[16];
190 	uint32_t decode_pos[16];
191 	uint32_t quick_bits;
192 	uint8_t quick_len[1 << 10];
193 	uint16_t quick_num[1 << 10];
194 	uint16_t decode_num[306];
195 };
196 
197 struct comp_state {
198 	/* Flag used to specify if unpacker needs to reinitialize the
199 	   uncompression context. */
200 	uint8_t initialized : 1;
201 
202 	/* Flag used when applying filters. */
203 	uint8_t all_filters_applied : 1;
204 
205 	/* Flag used to skip file context reinitialization, used when unpacker
206 	   is skipping through different multivolume archives. */
207 	uint8_t switch_multivolume : 1;
208 
209 	/* Flag used to specify if unpacker has processed the whole data block
210 	   or just a part of it. */
211 	uint8_t block_parsing_finished : 1;
212 
213 	signed int notused : 4;
214 
215 	int flags;                   /* Uncompression flags. */
216 	int method;                  /* Uncompression algorithm method. */
217 	int version;                 /* Uncompression algorithm version. */
218 	ssize_t window_size;         /* Size of window_buf. */
219 	uint8_t* window_buf;         /* Circular buffer used during
220 	                                decompression. */
221 	uint8_t* filtered_buf;       /* Buffer used when applying filters. */
222 	const uint8_t* block_buf;    /* Buffer used when merging blocks. */
223 	size_t window_mask;          /* Convenience field; window_size - 1. */
224 	int64_t write_ptr;           /* This amount of data has been unpacked
225 					in the window buffer. */
226 	int64_t last_write_ptr;      /* This amount of data has been stored in
227 	                                the output file. */
228 	int64_t last_unstore_ptr;    /* Counter of bytes extracted during
229 	                                unstoring. This is separate from
230 	                                last_write_ptr because of how SERVICE
231 	                                base blocks are handled during skipping
232 	                                in solid multiarchive archives. */
233 	int64_t solid_offset;        /* Additional offset inside the window
234 	                                buffer, used in unpacking solid
235 	                                archives. */
236 	ssize_t cur_block_size;      /* Size of current data block. */
237 	int last_len;                /* Flag used in lzss decompression. */
238 
239 	/* Decode tables used during lzss uncompression. */
240 
241 #define HUFF_BC 20
242 	struct decode_table bd;      /* huffman bit lengths */
243 #define HUFF_NC 306
244 	struct decode_table ld;      /* literals */
245 #define HUFF_DC 64
246 	struct decode_table dd;      /* distances */
247 #define HUFF_LDC 16
248 	struct decode_table ldd;     /* lower bits of distances */
249 #define HUFF_RC 44
250 	struct decode_table rd;      /* repeating distances */
251 #define HUFF_TABLE_SIZE (HUFF_NC + HUFF_DC + HUFF_RC + HUFF_LDC)
252 
253 	/* Circular deque for storing filters. */
254 	struct cdeque filters;
255 	int64_t last_block_start;    /* Used for sanity checking. */
256 	ssize_t last_block_length;   /* Used for sanity checking. */
257 
258 	/* Distance cache used during lzss uncompression. */
259 	int dist_cache[4];
260 
261 	/* Data buffer stack. */
262 	struct data_ready dready[2];
263 };
264 
265 /* Bit reader state. */
266 struct bit_reader {
267 	int8_t bit_addr;    /* Current bit pointer inside current byte. */
268 	int in_addr;        /* Current byte pointer. */
269 };
270 
271 /* RARv5 block header structure. Use bf_* functions to get values from
272  * block_flags_u8 field. I.e. bf_byte_count, etc. */
273 struct compressed_block_header {
274 	/* block_flags_u8 contain fields encoded in little-endian bitfield:
275 	 *
276 	 * - table present flag (shr 7, and 1),
277 	 * - last block flag    (shr 6, and 1),
278 	 * - byte_count         (shr 3, and 7),
279 	 * - bit_size           (shr 0, and 7).
280 	 */
281 	uint8_t block_flags_u8;
282 	uint8_t block_cksum;
283 };
284 
285 /* RARv5 main header structure. */
286 struct main_header {
287 	/* Does the archive contain solid streams? */
288 	uint8_t solid : 1;
289 
290 	/* If this a multi-file archive? */
291 	uint8_t volume : 1;
292 	uint8_t endarc : 1;
293 	uint8_t notused : 5;
294 
295 	unsigned int vol_no;
296 };
297 
298 struct generic_header {
299 	uint8_t split_after : 1;
300 	uint8_t split_before : 1;
301 	uint8_t padding : 6;
302 	int size;
303 	int last_header_id;
304 };
305 
306 struct multivolume {
307 	unsigned int expected_vol_no;
308 	uint8_t* push_buf;
309 };
310 
311 /* Main context structure. */
312 struct rar5 {
313 	int header_initialized;
314 
315 	/* Set to 1 if current file is positioned AFTER the magic value
316 	 * of the archive file. This is used in header reading functions. */
317 	int skipped_magic;
318 
319 	/* Set to not zero if we're in skip mode (either by calling
320 	 * rar5_data_skip function or when skipping over solid streams).
321 	 * Set to 0 when in * extraction mode. This is used during checksum
322 	 * calculation functions. */
323 	int skip_mode;
324 
325 	/* Set to not zero if we're in block merging mode (i.e. when switching
326 	 * to another file in multivolume archive, last block from 1st archive
327 	 * needs to be merged with 1st block from 2nd archive). This flag
328 	 * guards against recursive use of the merging function, which doesn't
329 	 * support recursive calls. */
330 	int merge_mode;
331 
332 	/* An offset to QuickOpen list. This is not supported by this unpacker,
333 	 * because we're focusing on streaming interface. QuickOpen is designed
334 	 * to make things quicker for non-stream interfaces, so it's not our
335 	 * use case. */
336 	uint64_t qlist_offset;
337 
338 	/* An offset to additional Recovery data. This is not supported by this
339 	 * unpacker. Recovery data are additional Reed-Solomon codes that could
340 	 * be used to calculate bytes that are missing in archive or are
341 	 * corrupted. */
342 	uint64_t rr_offset;
343 
344 	/* Various context variables grouped to different structures. */
345 	struct generic_header generic;
346 	struct main_header main;
347 	struct comp_state cstate;
348 	struct file_header file;
349 	struct bit_reader bits;
350 	struct multivolume vol;
351 
352 	/* The header of currently processed RARv5 block. Used in main
353 	 * decompression logic loop. */
354 	struct compressed_block_header last_block_hdr;
355 };
356 
357 /* Forward function declarations. */
358 
359 static void rar5_signature(char *buf);
360 static int verify_global_checksums(struct archive_read* a);
361 static int rar5_read_data_skip(struct archive_read *a);
362 static int push_data_ready(struct archive_read* a, struct rar5* rar,
363 	const uint8_t* buf, size_t size, int64_t offset);
364 
365 /* CDE_xxx = Circular Double Ended (Queue) return values. */
366 enum CDE_RETURN_VALUES {
367 	CDE_OK, CDE_ALLOC, CDE_PARAM, CDE_OUT_OF_BOUNDS,
368 };
369 
370 /* Clears the contents of this circular deque. */
cdeque_clear(struct cdeque * d)371 static void cdeque_clear(struct cdeque* d) {
372 	d->size = 0;
373 	d->beg_pos = 0;
374 	d->end_pos = 0;
375 }
376 
377 /* Creates a new circular deque object. Capacity must be power of 2: 8, 16, 32,
378  * 64, 256, etc. When the user will add another item above current capacity,
379  * the circular deque will overwrite the oldest entry. */
cdeque_init(struct cdeque * d,int max_capacity_power_of_2)380 static int cdeque_init(struct cdeque* d, int max_capacity_power_of_2) {
381 	if(d == NULL || max_capacity_power_of_2 == 0)
382 		return CDE_PARAM;
383 
384 	d->cap_mask = max_capacity_power_of_2 - 1;
385 	d->arr = NULL;
386 
387 	if((max_capacity_power_of_2 & d->cap_mask) != 0)
388 		return CDE_PARAM;
389 
390 	cdeque_clear(d);
391 	d->arr = malloc(sizeof(void*) * max_capacity_power_of_2);
392 
393 	return d->arr ? CDE_OK : CDE_ALLOC;
394 }
395 
396 /* Return the current size (not capacity) of circular deque `d`. */
cdeque_size(struct cdeque * d)397 static size_t cdeque_size(struct cdeque* d) {
398 	return d->size;
399 }
400 
401 /* Returns the first element of current circular deque. Note that this function
402  * doesn't perform any bounds checking. If you need bounds checking, use
403  * `cdeque_front()` function instead. */
cdeque_front_fast(struct cdeque * d,void ** value)404 static void cdeque_front_fast(struct cdeque* d, void** value) {
405 	*value = (void*) d->arr[d->beg_pos];
406 }
407 
408 /* Returns the first element of current circular deque. This function
409  * performs bounds checking. */
cdeque_front(struct cdeque * d,void ** value)410 static int cdeque_front(struct cdeque* d, void** value) {
411 	if(d->size > 0) {
412 		cdeque_front_fast(d, value);
413 		return CDE_OK;
414 	} else
415 		return CDE_OUT_OF_BOUNDS;
416 }
417 
418 /* Pushes a new element into the end of this circular deque object. If current
419  * size will exceed capacity, the oldest element will be overwritten. */
cdeque_push_back(struct cdeque * d,void * item)420 static int cdeque_push_back(struct cdeque* d, void* item) {
421 	if(d == NULL)
422 		return CDE_PARAM;
423 
424 	if(d->size == d->cap_mask + 1)
425 		return CDE_OUT_OF_BOUNDS;
426 
427 	d->arr[d->end_pos] = (size_t) item;
428 	d->end_pos = (d->end_pos + 1) & d->cap_mask;
429 	d->size++;
430 
431 	return CDE_OK;
432 }
433 
434 /* Pops a front element of this circular deque object and returns its value.
435  * This function doesn't perform any bounds checking. */
cdeque_pop_front_fast(struct cdeque * d,void ** value)436 static void cdeque_pop_front_fast(struct cdeque* d, void** value) {
437 	*value = (void*) d->arr[d->beg_pos];
438 	d->beg_pos = (d->beg_pos + 1) & d->cap_mask;
439 	d->size--;
440 }
441 
442 /* Pops a front element of this circular deque object and returns its value.
443  * This function performs bounds checking. */
cdeque_pop_front(struct cdeque * d,void ** value)444 static int cdeque_pop_front(struct cdeque* d, void** value) {
445 	if(!d || !value)
446 		return CDE_PARAM;
447 
448 	if(d->size == 0)
449 		return CDE_OUT_OF_BOUNDS;
450 
451 	cdeque_pop_front_fast(d, value);
452 	return CDE_OK;
453 }
454 
455 /* Convenience function to cast filter_info** to void **. */
cdeque_filter_p(struct filter_info ** f)456 static void** cdeque_filter_p(struct filter_info** f) {
457 	return (void**) (size_t) f;
458 }
459 
460 /* Convenience function to cast filter_info* to void *. */
cdeque_filter(struct filter_info * f)461 static void* cdeque_filter(struct filter_info* f) {
462 	return (void**) (size_t) f;
463 }
464 
465 /* Destroys this circular deque object. Deallocates the memory of the
466  * collection buffer, but doesn't deallocate the memory of any pointer passed
467  * to this deque as a value. */
cdeque_free(struct cdeque * d)468 static void cdeque_free(struct cdeque* d) {
469 	if(!d)
470 		return;
471 
472 	if(!d->arr)
473 		return;
474 
475 	free(d->arr);
476 
477 	d->arr = NULL;
478 	d->beg_pos = -1;
479 	d->end_pos = -1;
480 	d->cap_mask = 0;
481 }
482 
483 static inline
bf_bit_size(const struct compressed_block_header * hdr)484 uint8_t bf_bit_size(const struct compressed_block_header* hdr) {
485 	return hdr->block_flags_u8 & 7;
486 }
487 
488 static inline
bf_byte_count(const struct compressed_block_header * hdr)489 uint8_t bf_byte_count(const struct compressed_block_header* hdr) {
490 	return (hdr->block_flags_u8 >> 3) & 7;
491 }
492 
493 static inline
bf_is_table_present(const struct compressed_block_header * hdr)494 uint8_t bf_is_table_present(const struct compressed_block_header* hdr) {
495 	return (hdr->block_flags_u8 >> 7) & 1;
496 }
497 
498 static inline
bf_is_last_block(const struct compressed_block_header * hdr)499 uint8_t bf_is_last_block(const struct compressed_block_header* hdr) {
500 	return (hdr->block_flags_u8 >> 6) & 1;
501 }
502 
get_context(struct archive_read * a)503 static inline struct rar5* get_context(struct archive_read* a) {
504 	return (struct rar5*) a->format->data;
505 }
506 
507 /* Convenience functions used by filter implementations. */
circular_memcpy(uint8_t * dst,uint8_t * window,const uint64_t mask,int64_t start,int64_t end)508 static void circular_memcpy(uint8_t* dst, uint8_t* window, const uint64_t mask,
509     int64_t start, int64_t end)
510 {
511 	if((start & mask) > (end & mask)) {
512 		ssize_t len1 = mask + 1 - (start & mask);
513 		ssize_t len2 = end & mask;
514 
515 		memcpy(dst, &window[start & mask], len1);
516 		memcpy(dst + len1, window, len2);
517 	} else {
518 		memcpy(dst, &window[start & mask], (size_t) (end - start));
519 	}
520 }
521 
read_filter_data(struct rar5 * rar,uint32_t offset)522 static uint32_t read_filter_data(struct rar5* rar, uint32_t offset) {
523 	uint8_t linear_buf[4];
524 	circular_memcpy(linear_buf, rar->cstate.window_buf,
525 	    rar->cstate.window_mask, offset, offset + 4);
526 	return archive_le32dec(linear_buf);
527 }
528 
write_filter_data(struct rar5 * rar,uint32_t offset,uint32_t value)529 static void write_filter_data(struct rar5* rar, uint32_t offset,
530     uint32_t value)
531 {
532 	archive_le32enc(&rar->cstate.filtered_buf[offset], value);
533 }
534 
535 /* Allocates a new filter descriptor and adds it to the filter array. */
add_new_filter(struct rar5 * rar)536 static struct filter_info* add_new_filter(struct rar5* rar) {
537 	struct filter_info* f =
538 		(struct filter_info*) calloc(1, sizeof(struct filter_info));
539 
540 	if(!f) {
541 		return NULL;
542 	}
543 
544 	cdeque_push_back(&rar->cstate.filters, cdeque_filter(f));
545 	return f;
546 }
547 
run_delta_filter(struct rar5 * rar,struct filter_info * flt)548 static int run_delta_filter(struct rar5* rar, struct filter_info* flt) {
549 	int i;
550 	ssize_t dest_pos, src_pos = 0;
551 
552 	for(i = 0; i < flt->channels; i++) {
553 		uint8_t prev_byte = 0;
554 		for(dest_pos = i;
555 				dest_pos < flt->block_length;
556 				dest_pos += flt->channels)
557 		{
558 			uint8_t byte;
559 
560 			byte = rar->cstate.window_buf[
561 			    (rar->cstate.solid_offset + flt->block_start +
562 			    src_pos) & rar->cstate.window_mask];
563 
564 			prev_byte -= byte;
565 			rar->cstate.filtered_buf[dest_pos] = prev_byte;
566 			src_pos++;
567 		}
568 	}
569 
570 	return ARCHIVE_OK;
571 }
572 
run_e8e9_filter(struct rar5 * rar,struct filter_info * flt,int extended)573 static int run_e8e9_filter(struct rar5* rar, struct filter_info* flt,
574 		int extended)
575 {
576 	const uint32_t file_size = 0x1000000;
577 	ssize_t i;
578 
579 	circular_memcpy(rar->cstate.filtered_buf,
580 	    rar->cstate.window_buf, rar->cstate.window_mask,
581 	    rar->cstate.solid_offset + flt->block_start,
582 	    rar->cstate.solid_offset + flt->block_start + flt->block_length);
583 
584 	for(i = 0; i < flt->block_length - 4;) {
585 		uint8_t b = rar->cstate.window_buf[
586 		    (rar->cstate.solid_offset + flt->block_start +
587 		    i++) & rar->cstate.window_mask];
588 
589 		/*
590 		 * 0xE8 = x86's call <relative_addr_uint32> (function call)
591 		 * 0xE9 = x86's jmp <relative_addr_uint32> (unconditional jump)
592 		 */
593 		if(b == 0xE8 || (extended && b == 0xE9)) {
594 
595 			uint32_t addr;
596 			uint32_t offset = (i + flt->block_start) % file_size;
597 
598 			addr = read_filter_data(rar,
599 			    (uint32_t)(rar->cstate.solid_offset +
600 			    flt->block_start + i) & rar->cstate.window_mask);
601 
602 			if(addr & 0x80000000) {
603 				if(((addr + offset) & 0x80000000) == 0) {
604 					write_filter_data(rar, (uint32_t)i,
605 					    addr + file_size);
606 				}
607 			} else {
608 				if((addr - file_size) & 0x80000000) {
609 					uint32_t naddr = addr - offset;
610 					write_filter_data(rar, (uint32_t)i,
611 					    naddr);
612 				}
613 			}
614 
615 			i += 4;
616 		}
617 	}
618 
619 	return ARCHIVE_OK;
620 }
621 
run_arm_filter(struct rar5 * rar,struct filter_info * flt)622 static int run_arm_filter(struct rar5* rar, struct filter_info* flt) {
623 	ssize_t i = 0;
624 	uint32_t offset;
625 
626 	circular_memcpy(rar->cstate.filtered_buf,
627 	    rar->cstate.window_buf, rar->cstate.window_mask,
628 	    rar->cstate.solid_offset + flt->block_start,
629 	    rar->cstate.solid_offset + flt->block_start + flt->block_length);
630 
631 	for(i = 0; i < flt->block_length - 3; i += 4) {
632 		uint8_t* b = &rar->cstate.window_buf[
633 		    (rar->cstate.solid_offset +
634 		    flt->block_start + i + 3) & rar->cstate.window_mask];
635 
636 		if(*b == 0xEB) {
637 			/* 0xEB = ARM's BL (branch + link) instruction. */
638 			offset = read_filter_data(rar,
639 			    (rar->cstate.solid_offset + flt->block_start + i) &
640 			     (uint32_t)rar->cstate.window_mask) & 0x00ffffff;
641 
642 			offset -= (uint32_t) ((i + flt->block_start) / 4);
643 			offset = (offset & 0x00ffffff) | 0xeb000000;
644 			write_filter_data(rar, (uint32_t)i, offset);
645 		}
646 	}
647 
648 	return ARCHIVE_OK;
649 }
650 
run_filter(struct archive_read * a,struct filter_info * flt)651 static int run_filter(struct archive_read* a, struct filter_info* flt) {
652 	int ret;
653 	struct rar5* rar = get_context(a);
654 
655 	free(rar->cstate.filtered_buf);
656 
657 	rar->cstate.filtered_buf = malloc(flt->block_length);
658 	if(!rar->cstate.filtered_buf) {
659 		archive_set_error(&a->archive, ENOMEM,
660 		    "Can't allocate memory for filter data.");
661 		return ARCHIVE_FATAL;
662 	}
663 
664 	switch(flt->type) {
665 		case FILTER_DELTA:
666 			ret = run_delta_filter(rar, flt);
667 			break;
668 
669 		case FILTER_E8:
670 			/* fallthrough */
671 		case FILTER_E8E9:
672 			ret = run_e8e9_filter(rar, flt,
673 			    flt->type == FILTER_E8E9);
674 			break;
675 
676 		case FILTER_ARM:
677 			ret = run_arm_filter(rar, flt);
678 			break;
679 
680 		default:
681 			archive_set_error(&a->archive,
682 			    ARCHIVE_ERRNO_FILE_FORMAT,
683 			    "Unsupported filter type: 0x%x", flt->type);
684 			return ARCHIVE_FATAL;
685 	}
686 
687 	if(ret != ARCHIVE_OK) {
688 		/* Filter has failed. */
689 		return ret;
690 	}
691 
692 	if(ARCHIVE_OK != push_data_ready(a, rar, rar->cstate.filtered_buf,
693 	    flt->block_length, rar->cstate.last_write_ptr))
694 	{
695 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
696 		    "Stack overflow when submitting unpacked data");
697 
698 		return ARCHIVE_FATAL;
699 	}
700 
701 	rar->cstate.last_write_ptr += flt->block_length;
702 	return ARCHIVE_OK;
703 }
704 
705 /* The `push_data` function submits the selected data range to the user.
706  * Next call of `use_data` will use the pointer, size and offset arguments
707  * that are specified here. These arguments are pushed to the FIFO stack here,
708  * and popped from the stack by the `use_data` function. */
push_data(struct archive_read * a,struct rar5 * rar,const uint8_t * buf,int64_t idx_begin,int64_t idx_end)709 static void push_data(struct archive_read* a, struct rar5* rar,
710     const uint8_t* buf, int64_t idx_begin, int64_t idx_end)
711 {
712 	const uint64_t wmask = rar->cstate.window_mask;
713 	const ssize_t solid_write_ptr = (rar->cstate.solid_offset +
714 	    rar->cstate.last_write_ptr) & wmask;
715 
716 	idx_begin += rar->cstate.solid_offset;
717 	idx_end += rar->cstate.solid_offset;
718 
719 	/* Check if our unpacked data is wrapped inside the window circular
720 	 * buffer.  If it's not wrapped, it can be copied out by using
721 	 * a single memcpy, but when it's wrapped, we need to copy the first
722 	 * part with one memcpy, and the second part with another memcpy. */
723 
724 	if((idx_begin & wmask) > (idx_end & wmask)) {
725 		/* The data is wrapped (begin offset sis bigger than end
726 		 * offset). */
727 		const ssize_t frag1_size = rar->cstate.window_size -
728 		    (idx_begin & wmask);
729 		const ssize_t frag2_size = idx_end & wmask;
730 
731 		/* Copy the first part of the buffer first. */
732 		push_data_ready(a, rar, buf + solid_write_ptr, frag1_size,
733 		    rar->cstate.last_write_ptr);
734 
735 		/* Copy the second part of the buffer. */
736 		push_data_ready(a, rar, buf, frag2_size,
737 		    rar->cstate.last_write_ptr + frag1_size);
738 
739 		rar->cstate.last_write_ptr += frag1_size + frag2_size;
740 	} else {
741 		/* Data is not wrapped, so we can just use one call to copy the
742 		 * data. */
743 		push_data_ready(a, rar,
744 		    buf + solid_write_ptr, (idx_end - idx_begin) & wmask,
745 		    rar->cstate.last_write_ptr);
746 
747 		rar->cstate.last_write_ptr += idx_end - idx_begin;
748 	}
749 }
750 
751 /* Convenience function that submits the data to the user. It uses the
752  * unpack window buffer as a source location. */
push_window_data(struct archive_read * a,struct rar5 * rar,int64_t idx_begin,int64_t idx_end)753 static void push_window_data(struct archive_read* a, struct rar5* rar,
754     int64_t idx_begin, int64_t idx_end)
755 {
756 	push_data(a, rar, rar->cstate.window_buf, idx_begin, idx_end);
757 }
758 
apply_filters(struct archive_read * a)759 static int apply_filters(struct archive_read* a) {
760 	struct filter_info* flt;
761 	struct rar5* rar = get_context(a);
762 	int ret;
763 
764 	rar->cstate.all_filters_applied = 0;
765 
766 	/* Get the first filter that can be applied to our data. The data
767 	 * needs to be fully unpacked before the filter can be run. */
768 	if(CDE_OK == cdeque_front(&rar->cstate.filters,
769 	    cdeque_filter_p(&flt))) {
770 		/* Check if our unpacked data fully covers this filter's
771 		 * range. */
772 		if(rar->cstate.write_ptr > flt->block_start &&
773 		    rar->cstate.write_ptr >= flt->block_start +
774 		    flt->block_length) {
775 			/* Check if we have some data pending to be written
776 			 * right before the filter's start offset. */
777 			if(rar->cstate.last_write_ptr == flt->block_start) {
778 				/* Run the filter specified by descriptor
779 				 * `flt`. */
780 				ret = run_filter(a, flt);
781 				if(ret != ARCHIVE_OK) {
782 					/* Filter failure, return error. */
783 					return ret;
784 				}
785 
786 				/* Filter descriptor won't be needed anymore
787 				 * after it's used, * so remove it from the
788 				 * filter list and free its memory. */
789 				(void) cdeque_pop_front(&rar->cstate.filters,
790 				    cdeque_filter_p(&flt));
791 
792 				free(flt);
793 			} else {
794 				/* We can't run filters yet, dump the memory
795 				 * right before the filter. */
796 				push_window_data(a, rar,
797 				    rar->cstate.last_write_ptr,
798 				    flt->block_start);
799 			}
800 
801 			/* Return 'filter applied or not needed' state to the
802 			 * caller. */
803 			return ARCHIVE_RETRY;
804 		}
805 	}
806 
807 	rar->cstate.all_filters_applied = 1;
808 	return ARCHIVE_OK;
809 }
810 
dist_cache_push(struct rar5 * rar,int value)811 static void dist_cache_push(struct rar5* rar, int value) {
812 	int* q = rar->cstate.dist_cache;
813 
814 	q[3] = q[2];
815 	q[2] = q[1];
816 	q[1] = q[0];
817 	q[0] = value;
818 }
819 
dist_cache_touch(struct rar5 * rar,int idx)820 static int dist_cache_touch(struct rar5* rar, int idx) {
821 	int* q = rar->cstate.dist_cache;
822 	int i, dist = q[idx];
823 
824 	for(i = idx; i > 0; i--)
825 		q[i] = q[i - 1];
826 
827 	q[0] = dist;
828 	return dist;
829 }
830 
free_filters(struct rar5 * rar)831 static void free_filters(struct rar5* rar) {
832 	struct cdeque* d = &rar->cstate.filters;
833 
834 	/* Free any remaining filters. All filters should be naturally
835 	 * consumed by the unpacking function, so remaining filters after
836 	 * unpacking normally mean that unpacking wasn't successful.
837 	 * But still of course we shouldn't leak memory in such case. */
838 
839 	/* cdeque_size() is a fast operation, so we can use it as a loop
840 	 * expression. */
841 	while(cdeque_size(d) > 0) {
842 		struct filter_info* f = NULL;
843 
844 		/* Pop_front will also decrease the collection's size. */
845 		if (CDE_OK == cdeque_pop_front(d, cdeque_filter_p(&f)))
846 			free(f);
847 	}
848 
849 	cdeque_clear(d);
850 
851 	/* Also clear out the variables needed for sanity checking. */
852 	rar->cstate.last_block_start = 0;
853 	rar->cstate.last_block_length = 0;
854 }
855 
reset_file_context(struct rar5 * rar)856 static void reset_file_context(struct rar5* rar) {
857 	memset(&rar->file, 0, sizeof(rar->file));
858 	blake2sp_init(&rar->file.b2state, 32);
859 
860 	if(rar->main.solid) {
861 		rar->cstate.solid_offset += rar->cstate.write_ptr;
862 	} else {
863 		rar->cstate.solid_offset = 0;
864 	}
865 
866 	rar->cstate.write_ptr = 0;
867 	rar->cstate.last_write_ptr = 0;
868 	rar->cstate.last_unstore_ptr = 0;
869 
870 	rar->file.redir_type = REDIR_TYPE_NONE;
871 	rar->file.redir_flags = 0;
872 
873 	free_filters(rar);
874 }
875 
get_archive_read(struct archive * a,struct archive_read ** ar)876 static inline int get_archive_read(struct archive* a,
877     struct archive_read** ar)
878 {
879 	*ar = (struct archive_read*) a;
880 	archive_check_magic(a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
881 	    "archive_read_support_format_rar5");
882 
883 	return ARCHIVE_OK;
884 }
885 
read_ahead(struct archive_read * a,size_t how_many,const uint8_t ** ptr)886 static int read_ahead(struct archive_read* a, size_t how_many,
887     const uint8_t** ptr)
888 {
889 	ssize_t avail = -1;
890 	if(!ptr)
891 		return 0;
892 
893 	*ptr = __archive_read_ahead(a, how_many, &avail);
894 	if(*ptr == NULL) {
895 		return 0;
896 	}
897 
898 	return 1;
899 }
900 
consume(struct archive_read * a,int64_t how_many)901 static int consume(struct archive_read* a, int64_t how_many) {
902 	int ret;
903 
904 	ret = how_many == __archive_read_consume(a, how_many)
905 		? ARCHIVE_OK
906 		: ARCHIVE_FATAL;
907 
908 	return ret;
909 }
910 
911 /**
912  * Read a RAR5 variable sized numeric value. This value will be stored in
913  * `pvalue`. The `pvalue_len` argument points to a variable that will receive
914  * the byte count that was consumed in order to decode the `pvalue` value, plus
915  * one.
916  *
917  * pvalue_len is optional and can be NULL.
918  *
919  * NOTE: if `pvalue_len` is NOT NULL, the caller needs to manually consume
920  * the number of bytes that `pvalue_len` value contains. If the `pvalue_len`
921  * is NULL, this consuming operation is done automatically.
922  *
923  * Returns 1 if *pvalue was successfully read.
924  * Returns 0 if there was an error. In this case, *pvalue contains an
925  *           invalid value.
926  */
927 
read_var(struct archive_read * a,uint64_t * pvalue,uint64_t * pvalue_len)928 static int read_var(struct archive_read* a, uint64_t* pvalue,
929     uint64_t* pvalue_len)
930 {
931 	uint64_t result = 0;
932 	size_t shift, i;
933 	const uint8_t* p;
934 	uint8_t b;
935 
936 	/* We will read maximum of 8 bytes. We don't have to handle the
937 	 * situation to read the RAR5 variable-sized value stored at the end of
938 	 * the file, because such situation will never happen. */
939 	if(!read_ahead(a, 8, &p))
940 		return 0;
941 
942 	for(shift = 0, i = 0; i < 8; i++, shift += 7) {
943 		b = p[i];
944 
945 		/* Strip the MSB from the input byte and add the resulting
946 		 * number to the `result`. */
947 		result += (b & (uint64_t)0x7F) << shift;
948 
949 		/* MSB set to 1 means we need to continue decoding process.
950 		 * MSB set to 0 means we're done.
951 		 *
952 		 * This conditional checks for the second case. */
953 		if((b & 0x80) == 0) {
954 			if(pvalue) {
955 				*pvalue = result;
956 			}
957 
958 			/* If the caller has passed the `pvalue_len` pointer,
959 			 * store the number of consumed bytes in it and do NOT
960 			 * consume those bytes, since the caller has all the
961 			 * information it needs to perform */
962 			if(pvalue_len) {
963 				*pvalue_len = 1 + i;
964 			} else {
965 				/* If the caller did not provide the
966 				 * `pvalue_len` pointer, it will not have the
967 				 * possibility to advance the file pointer,
968 				 * because it will not know how many bytes it
969 				 * needs to consume. This is why we handle
970 				 * such situation here automatically. */
971 				if(ARCHIVE_OK != consume(a, 1 + i)) {
972 					return 0;
973 				}
974 			}
975 
976 			/* End of decoding process, return success. */
977 			return 1;
978 		}
979 	}
980 
981 	/* The decoded value takes the maximum number of 8 bytes.
982 	 * It's a maximum number of bytes, so end decoding process here
983 	 * even if the first bit of last byte is 1. */
984 	if(pvalue) {
985 		*pvalue = result;
986 	}
987 
988 	if(pvalue_len) {
989 		*pvalue_len = 9;
990 	} else {
991 		if(ARCHIVE_OK != consume(a, 9)) {
992 			return 0;
993 		}
994 	}
995 
996 	return 1;
997 }
998 
read_var_sized(struct archive_read * a,size_t * pvalue,size_t * pvalue_len)999 static int read_var_sized(struct archive_read* a, size_t* pvalue,
1000     size_t* pvalue_len)
1001 {
1002 	uint64_t v;
1003 	uint64_t v_size = 0;
1004 
1005 	const int ret = pvalue_len ? read_var(a, &v, &v_size)
1006 				   : read_var(a, &v, NULL);
1007 
1008 	if(ret == 1 && pvalue) {
1009 		*pvalue = (size_t) v;
1010 	}
1011 
1012 	if(pvalue_len) {
1013 		/* Possible data truncation should be safe. */
1014 		*pvalue_len = (size_t) v_size;
1015 	}
1016 
1017 	return ret;
1018 }
1019 
read_bits_32(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint32_t * value)1020 static int read_bits_32(struct archive_read* a, struct rar5* rar,
1021 	const uint8_t* p, uint32_t* value)
1022 {
1023 	if(rar->bits.in_addr >= rar->cstate.cur_block_size) {
1024 		archive_set_error(&a->archive,
1025 			ARCHIVE_ERRNO_PROGRAMMER,
1026 			"Premature end of stream during extraction of data (#1)");
1027 		return ARCHIVE_FATAL;
1028 	}
1029 
1030 	uint32_t bits = ((uint32_t) p[rar->bits.in_addr]) << 24;
1031 	bits |= p[rar->bits.in_addr + 1] << 16;
1032 	bits |= p[rar->bits.in_addr + 2] << 8;
1033 	bits |= p[rar->bits.in_addr + 3];
1034 	bits <<= rar->bits.bit_addr;
1035 	bits |= p[rar->bits.in_addr + 4] >> (8 - rar->bits.bit_addr);
1036 	*value = bits;
1037 	return ARCHIVE_OK;
1038 }
1039 
read_bits_16(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint16_t * value)1040 static int read_bits_16(struct archive_read* a, struct rar5* rar,
1041 	const uint8_t* p, uint16_t* value)
1042 {
1043 	if(rar->bits.in_addr >= rar->cstate.cur_block_size) {
1044 		archive_set_error(&a->archive,
1045 			ARCHIVE_ERRNO_PROGRAMMER,
1046 			"Premature end of stream during extraction of data (#2)");
1047 		return ARCHIVE_FATAL;
1048 	}
1049 
1050 	int bits = (int) ((uint32_t) p[rar->bits.in_addr]) << 16;
1051 	bits |= (int) p[rar->bits.in_addr + 1] << 8;
1052 	bits |= (int) p[rar->bits.in_addr + 2];
1053 	bits >>= (8 - rar->bits.bit_addr);
1054 	*value = bits & 0xffff;
1055 	return ARCHIVE_OK;
1056 }
1057 
skip_bits(struct rar5 * rar,int bits)1058 static void skip_bits(struct rar5* rar, int bits) {
1059 	const int new_bits = rar->bits.bit_addr + bits;
1060 	rar->bits.in_addr += new_bits >> 3;
1061 	rar->bits.bit_addr = new_bits & 7;
1062 }
1063 
1064 /* n = up to 16 */
read_consume_bits(struct archive_read * a,struct rar5 * rar,const uint8_t * p,int n,int * value)1065 static int read_consume_bits(struct archive_read* a, struct rar5* rar,
1066 	const uint8_t* p, int n, int* value)
1067 {
1068 	uint16_t v;
1069 	int ret, num;
1070 
1071 	if(n == 0 || n > 16) {
1072 		/* This is a programmer error and should never happen
1073 		 * in runtime. */
1074 		return ARCHIVE_FATAL;
1075 	}
1076 
1077 	ret = read_bits_16(a, rar, p, &v);
1078 	if(ret != ARCHIVE_OK)
1079 		return ret;
1080 
1081 	num = (int) v;
1082 	num >>= 16 - n;
1083 
1084 	skip_bits(rar, n);
1085 
1086 	if(value)
1087 		*value = num;
1088 
1089 	return ARCHIVE_OK;
1090 }
1091 
read_u32(struct archive_read * a,uint32_t * pvalue)1092 static int read_u32(struct archive_read* a, uint32_t* pvalue) {
1093 	const uint8_t* p;
1094 	if(!read_ahead(a, 4, &p))
1095 		return 0;
1096 
1097 	*pvalue = archive_le32dec(p);
1098 	return ARCHIVE_OK == consume(a, 4) ? 1 : 0;
1099 }
1100 
read_u64(struct archive_read * a,uint64_t * pvalue)1101 static int read_u64(struct archive_read* a, uint64_t* pvalue) {
1102 	const uint8_t* p;
1103 	if(!read_ahead(a, 8, &p))
1104 		return 0;
1105 
1106 	*pvalue = archive_le64dec(p);
1107 	return ARCHIVE_OK == consume(a, 8) ? 1 : 0;
1108 }
1109 
bid_standard(struct archive_read * a)1110 static int bid_standard(struct archive_read* a) {
1111 	const uint8_t* p;
1112 	char signature[sizeof(rar5_signature_xor)];
1113 
1114 	rar5_signature(signature);
1115 
1116 	if(!read_ahead(a, sizeof(rar5_signature_xor), &p))
1117 		return -1;
1118 
1119 	if(!memcmp(signature, p, sizeof(rar5_signature_xor)))
1120 		return 30;
1121 
1122 	return -1;
1123 }
1124 
bid_sfx(struct archive_read * a)1125 static int bid_sfx(struct archive_read *a)
1126 {
1127 	const char *p;
1128 
1129 	if ((p = __archive_read_ahead(a, 7, NULL)) == NULL)
1130 		return -1;
1131 
1132 	if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0) {
1133 		/* This is a PE file */
1134 		char signature[sizeof(rar5_signature_xor)];
1135 		ssize_t offset = 0x10000;
1136 		ssize_t window = 4096;
1137 		ssize_t bytes_avail;
1138 
1139 		rar5_signature(signature);
1140 
1141 		while (offset + window <= (1024 * 512)) {
1142 			const char *buff = __archive_read_ahead(a, offset + window, &bytes_avail);
1143 			if (buff == NULL) {
1144 				/* Remaining bytes are less than window. */
1145 				window >>= 1;
1146 				if (window < 0x40)
1147 					return 0;
1148 				continue;
1149 			}
1150 			p = buff + offset;
1151 			while (p + 8 < buff + bytes_avail) {
1152 				if (memcmp(p, signature, sizeof(signature)) == 0)
1153 					return 30;
1154 				p += 0x10;
1155 			}
1156 			offset = p - buff;
1157 		}
1158 	}
1159 
1160 	return 0;
1161 }
1162 
rar5_bid(struct archive_read * a,int best_bid)1163 static int rar5_bid(struct archive_read* a, int best_bid) {
1164 	int my_bid;
1165 
1166 	if(best_bid > 30)
1167 		return -1;
1168 
1169 	my_bid = bid_standard(a);
1170 	if(my_bid > -1) {
1171 		return my_bid;
1172 	}
1173 	my_bid = bid_sfx(a);
1174 	if (my_bid > -1) {
1175 		return my_bid;
1176 	}
1177 
1178 	return -1;
1179 }
1180 
rar5_options(struct archive_read * a,const char * key,const char * val)1181 static int rar5_options(struct archive_read *a, const char *key,
1182     const char *val) {
1183 	(void) a;
1184 	(void) key;
1185 	(void) val;
1186 
1187 	/* No options supported in this version. Return the ARCHIVE_WARN code
1188 	 * to signal the options supervisor that the unpacker didn't handle
1189 	 * setting this option. */
1190 
1191 	return ARCHIVE_WARN;
1192 }
1193 
init_header(struct archive_read * a)1194 static void init_header(struct archive_read* a) {
1195 	a->archive.archive_format = ARCHIVE_FORMAT_RAR_V5;
1196 	a->archive.archive_format_name = "RAR5";
1197 }
1198 
init_window_mask(struct rar5 * rar)1199 static void init_window_mask(struct rar5* rar) {
1200 	if (rar->cstate.window_size)
1201 		rar->cstate.window_mask = rar->cstate.window_size - 1;
1202 	else
1203 		rar->cstate.window_mask = 0;
1204 }
1205 
1206 enum HEADER_FLAGS {
1207 	HFL_EXTRA_DATA = 0x0001,
1208 	HFL_DATA = 0x0002,
1209 	HFL_SKIP_IF_UNKNOWN = 0x0004,
1210 	HFL_SPLIT_BEFORE = 0x0008,
1211 	HFL_SPLIT_AFTER = 0x0010,
1212 	HFL_CHILD = 0x0020,
1213 	HFL_INHERITED = 0x0040
1214 };
1215 
process_main_locator_extra_block(struct archive_read * a,struct rar5 * rar)1216 static int process_main_locator_extra_block(struct archive_read* a,
1217     struct rar5* rar)
1218 {
1219 	uint64_t locator_flags;
1220 
1221 	enum LOCATOR_FLAGS {
1222 		QLIST = 0x01, RECOVERY = 0x02,
1223 	};
1224 
1225 	if(!read_var(a, &locator_flags, NULL)) {
1226 		return ARCHIVE_EOF;
1227 	}
1228 
1229 	if(locator_flags & QLIST) {
1230 		if(!read_var(a, &rar->qlist_offset, NULL)) {
1231 			return ARCHIVE_EOF;
1232 		}
1233 
1234 		/* qlist is not used */
1235 	}
1236 
1237 	if(locator_flags & RECOVERY) {
1238 		if(!read_var(a, &rar->rr_offset, NULL)) {
1239 			return ARCHIVE_EOF;
1240 		}
1241 
1242 		/* rr is not used */
1243 	}
1244 
1245 	return ARCHIVE_OK;
1246 }
1247 
parse_file_extra_hash(struct archive_read * a,struct rar5 * rar,ssize_t * extra_data_size)1248 static int parse_file_extra_hash(struct archive_read* a, struct rar5* rar,
1249     ssize_t* extra_data_size)
1250 {
1251 	size_t hash_type = 0;
1252 	size_t value_len;
1253 
1254 	enum HASH_TYPE {
1255 		BLAKE2sp = 0x00
1256 	};
1257 
1258 	if(!read_var_sized(a, &hash_type, &value_len))
1259 		return ARCHIVE_EOF;
1260 
1261 	*extra_data_size -= value_len;
1262 	if(ARCHIVE_OK != consume(a, value_len)) {
1263 		return ARCHIVE_EOF;
1264 	}
1265 
1266 	/* The file uses BLAKE2sp checksum algorithm instead of plain old
1267 	 * CRC32. */
1268 	if(hash_type == BLAKE2sp) {
1269 		const uint8_t* p;
1270 		const int hash_size = sizeof(rar->file.blake2sp);
1271 
1272 		if(!read_ahead(a, hash_size, &p))
1273 			return ARCHIVE_EOF;
1274 
1275 		rar->file.has_blake2 = 1;
1276 		memcpy(&rar->file.blake2sp, p, hash_size);
1277 
1278 		if(ARCHIVE_OK != consume(a, hash_size)) {
1279 			return ARCHIVE_EOF;
1280 		}
1281 
1282 		*extra_data_size -= hash_size;
1283 	} else {
1284 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1285 		    "Unsupported hash type (0x%x)", (int) hash_type);
1286 		return ARCHIVE_FATAL;
1287 	}
1288 
1289 	return ARCHIVE_OK;
1290 }
1291 
time_win_to_unix(uint64_t win_time)1292 static uint64_t time_win_to_unix(uint64_t win_time) {
1293 	const size_t ns_in_sec = 10000000;
1294 	const uint64_t sec_to_unix = 11644473600LL;
1295 	return win_time / ns_in_sec - sec_to_unix;
1296 }
1297 
parse_htime_item(struct archive_read * a,char unix_time,uint64_t * where,ssize_t * extra_data_size)1298 static int parse_htime_item(struct archive_read* a, char unix_time,
1299     uint64_t* where, ssize_t* extra_data_size)
1300 {
1301 	if(unix_time) {
1302 		uint32_t time_val;
1303 		if(!read_u32(a, &time_val))
1304 			return ARCHIVE_EOF;
1305 
1306 		*extra_data_size -= 4;
1307 		*where = (uint64_t) time_val;
1308 	} else {
1309 		uint64_t windows_time;
1310 		if(!read_u64(a, &windows_time))
1311 			return ARCHIVE_EOF;
1312 
1313 		*where = time_win_to_unix(windows_time);
1314 		*extra_data_size -= 8;
1315 	}
1316 
1317 	return ARCHIVE_OK;
1318 }
1319 
parse_file_extra_version(struct archive_read * a,struct archive_entry * e,ssize_t * extra_data_size)1320 static int parse_file_extra_version(struct archive_read* a,
1321     struct archive_entry* e, ssize_t* extra_data_size)
1322 {
1323 	size_t flags = 0;
1324 	size_t version = 0;
1325 	size_t value_len = 0;
1326 	struct archive_string version_string;
1327 	struct archive_string name_utf8_string;
1328 	const char* cur_filename;
1329 
1330 	/* Flags are ignored. */
1331 	if(!read_var_sized(a, &flags, &value_len))
1332 		return ARCHIVE_EOF;
1333 
1334 	*extra_data_size -= value_len;
1335 	if(ARCHIVE_OK != consume(a, value_len))
1336 		return ARCHIVE_EOF;
1337 
1338 	if(!read_var_sized(a, &version, &value_len))
1339 		return ARCHIVE_EOF;
1340 
1341 	*extra_data_size -= value_len;
1342 	if(ARCHIVE_OK != consume(a, value_len))
1343 		return ARCHIVE_EOF;
1344 
1345 	/* extra_data_size should be zero here. */
1346 
1347 	cur_filename = archive_entry_pathname_utf8(e);
1348 	if(cur_filename == NULL) {
1349 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1350 		    "Version entry without file name");
1351 		return ARCHIVE_FATAL;
1352 	}
1353 
1354 	archive_string_init(&version_string);
1355 	archive_string_init(&name_utf8_string);
1356 
1357 	/* Prepare a ;123 suffix for the filename, where '123' is the version
1358 	 * value of this file. */
1359 	archive_string_sprintf(&version_string, ";%zu", version);
1360 
1361 	/* Build the new filename. */
1362 	archive_strcat(&name_utf8_string, cur_filename);
1363 	archive_strcat(&name_utf8_string, version_string.s);
1364 
1365 	/* Apply the new filename into this file's context. */
1366 	archive_entry_update_pathname_utf8(e, name_utf8_string.s);
1367 
1368 	/* Free buffers. */
1369 	archive_string_free(&version_string);
1370 	archive_string_free(&name_utf8_string);
1371 	return ARCHIVE_OK;
1372 }
1373 
parse_file_extra_htime(struct archive_read * a,struct archive_entry * e,struct rar5 * rar,ssize_t * extra_data_size)1374 static int parse_file_extra_htime(struct archive_read* a,
1375     struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size)
1376 {
1377 	char unix_time = 0;
1378 	size_t flags = 0;
1379 	size_t value_len;
1380 
1381 	enum HTIME_FLAGS {
1382 		IS_UNIX       = 0x01,
1383 		HAS_MTIME     = 0x02,
1384 		HAS_CTIME     = 0x04,
1385 		HAS_ATIME     = 0x08,
1386 		HAS_UNIX_NS   = 0x10,
1387 	};
1388 
1389 	if(!read_var_sized(a, &flags, &value_len))
1390 		return ARCHIVE_EOF;
1391 
1392 	*extra_data_size -= value_len;
1393 	if(ARCHIVE_OK != consume(a, value_len)) {
1394 		return ARCHIVE_EOF;
1395 	}
1396 
1397 	unix_time = flags & IS_UNIX;
1398 
1399 	if(flags & HAS_MTIME) {
1400 		parse_htime_item(a, unix_time, &rar->file.e_mtime,
1401 		    extra_data_size);
1402 		archive_entry_set_mtime(e, rar->file.e_mtime, 0);
1403 	}
1404 
1405 	if(flags & HAS_CTIME) {
1406 		parse_htime_item(a, unix_time, &rar->file.e_ctime,
1407 		    extra_data_size);
1408 		archive_entry_set_ctime(e, rar->file.e_ctime, 0);
1409 	}
1410 
1411 	if(flags & HAS_ATIME) {
1412 		parse_htime_item(a, unix_time, &rar->file.e_atime,
1413 		    extra_data_size);
1414 		archive_entry_set_atime(e, rar->file.e_atime, 0);
1415 	}
1416 
1417 	if(flags & HAS_UNIX_NS) {
1418 		if(!read_u32(a, &rar->file.e_unix_ns))
1419 			return ARCHIVE_EOF;
1420 
1421 		*extra_data_size -= 4;
1422 	}
1423 
1424 	return ARCHIVE_OK;
1425 }
1426 
parse_file_extra_redir(struct archive_read * a,struct archive_entry * e,struct rar5 * rar,ssize_t * extra_data_size)1427 static int parse_file_extra_redir(struct archive_read* a,
1428     struct archive_entry* e, struct rar5* rar, ssize_t* extra_data_size)
1429 {
1430 	uint64_t value_size = 0;
1431 	size_t target_size = 0;
1432 	char target_utf8_buf[MAX_NAME_IN_BYTES];
1433 	const uint8_t* p;
1434 
1435 	if(!read_var(a, &rar->file.redir_type, &value_size))
1436 		return ARCHIVE_EOF;
1437 	if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1438 		return ARCHIVE_EOF;
1439 	*extra_data_size -= value_size;
1440 
1441 	if(!read_var(a, &rar->file.redir_flags, &value_size))
1442 		return ARCHIVE_EOF;
1443 	if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1444 		return ARCHIVE_EOF;
1445 	*extra_data_size -= value_size;
1446 
1447 	if(!read_var_sized(a, &target_size, NULL))
1448 		return ARCHIVE_EOF;
1449 	*extra_data_size -= target_size + 1;
1450 
1451 	if(!read_ahead(a, target_size, &p))
1452 		return ARCHIVE_EOF;
1453 
1454 	if(target_size > (MAX_NAME_IN_CHARS - 1)) {
1455 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1456 		    "Link target is too long");
1457 		return ARCHIVE_FATAL;
1458 	}
1459 
1460 	if(target_size == 0) {
1461 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1462 		    "No link target specified");
1463 		return ARCHIVE_FATAL;
1464 	}
1465 
1466 	memcpy(target_utf8_buf, p, target_size);
1467 	target_utf8_buf[target_size] = 0;
1468 
1469 	if(ARCHIVE_OK != consume(a, (int64_t)target_size))
1470 		return ARCHIVE_EOF;
1471 
1472 	switch(rar->file.redir_type) {
1473 		case REDIR_TYPE_UNIXSYMLINK:
1474 		case REDIR_TYPE_WINSYMLINK:
1475 			archive_entry_set_filetype(e, AE_IFLNK);
1476 			archive_entry_update_symlink_utf8(e, target_utf8_buf);
1477 			if (rar->file.redir_flags & REDIR_SYMLINK_IS_DIR) {
1478 				archive_entry_set_symlink_type(e,
1479 					AE_SYMLINK_TYPE_DIRECTORY);
1480 			} else {
1481 				archive_entry_set_symlink_type(e,
1482 				AE_SYMLINK_TYPE_FILE);
1483 			}
1484 			break;
1485 
1486 		case REDIR_TYPE_HARDLINK:
1487 			archive_entry_set_filetype(e, AE_IFREG);
1488 			archive_entry_update_hardlink_utf8(e, target_utf8_buf);
1489 			break;
1490 
1491 		default:
1492 			/* Unknown redir type, skip it. */
1493 			break;
1494 	}
1495 	return ARCHIVE_OK;
1496 }
1497 
parse_file_extra_owner(struct archive_read * a,struct archive_entry * e,ssize_t * extra_data_size)1498 static int parse_file_extra_owner(struct archive_read* a,
1499     struct archive_entry* e, ssize_t* extra_data_size)
1500 {
1501 	uint64_t flags = 0;
1502 	uint64_t value_size = 0;
1503 	uint64_t id = 0;
1504 	size_t name_len = 0;
1505 	size_t name_size = 0;
1506 	char namebuf[OWNER_MAXNAMELEN];
1507 	const uint8_t* p;
1508 
1509 	if(!read_var(a, &flags, &value_size))
1510 		return ARCHIVE_EOF;
1511 	if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1512 		return ARCHIVE_EOF;
1513 	*extra_data_size -= value_size;
1514 
1515 	if ((flags & OWNER_USER_NAME) != 0) {
1516 		if(!read_var_sized(a, &name_size, NULL))
1517 			return ARCHIVE_EOF;
1518 		*extra_data_size -= name_size + 1;
1519 
1520 		if(!read_ahead(a, name_size, &p))
1521 			return ARCHIVE_EOF;
1522 
1523 		if (name_size >= OWNER_MAXNAMELEN) {
1524 			name_len = OWNER_MAXNAMELEN - 1;
1525 		} else {
1526 			name_len = name_size;
1527 		}
1528 
1529 		memcpy(namebuf, p, name_len);
1530 		namebuf[name_len] = 0;
1531 		if(ARCHIVE_OK != consume(a, (int64_t)name_size))
1532 			return ARCHIVE_EOF;
1533 
1534 		archive_entry_set_uname(e, namebuf);
1535 	}
1536 	if ((flags & OWNER_GROUP_NAME) != 0) {
1537 		if(!read_var_sized(a, &name_size, NULL))
1538 			return ARCHIVE_EOF;
1539 		*extra_data_size -= name_size + 1;
1540 
1541 		if(!read_ahead(a, name_size, &p))
1542 			return ARCHIVE_EOF;
1543 
1544 		if (name_size >= OWNER_MAXNAMELEN) {
1545 			name_len = OWNER_MAXNAMELEN - 1;
1546 		} else {
1547 			name_len = name_size;
1548 		}
1549 
1550 		memcpy(namebuf, p, name_len);
1551 		namebuf[name_len] = 0;
1552 		if(ARCHIVE_OK != consume(a, (int64_t)name_size))
1553 			return ARCHIVE_EOF;
1554 
1555 		archive_entry_set_gname(e, namebuf);
1556 	}
1557 	if ((flags & OWNER_USER_UID) != 0) {
1558 		if(!read_var(a, &id, &value_size))
1559 			return ARCHIVE_EOF;
1560 		if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1561 			return ARCHIVE_EOF;
1562 		*extra_data_size -= value_size;
1563 
1564 		archive_entry_set_uid(e, (la_int64_t)id);
1565 	}
1566 	if ((flags & OWNER_GROUP_GID) != 0) {
1567 		if(!read_var(a, &id, &value_size))
1568 			return ARCHIVE_EOF;
1569 		if(ARCHIVE_OK != consume(a, (int64_t)value_size))
1570 			return ARCHIVE_EOF;
1571 		*extra_data_size -= value_size;
1572 
1573 		archive_entry_set_gid(e, (la_int64_t)id);
1574 	}
1575 	return ARCHIVE_OK;
1576 }
1577 
process_head_file_extra(struct archive_read * a,struct archive_entry * e,struct rar5 * rar,ssize_t extra_data_size)1578 static int process_head_file_extra(struct archive_read* a,
1579     struct archive_entry* e, struct rar5* rar, ssize_t extra_data_size)
1580 {
1581 	size_t extra_field_size;
1582 	size_t extra_field_id = 0;
1583 	int ret = ARCHIVE_FATAL;
1584 	size_t var_size;
1585 
1586 	while(extra_data_size > 0) {
1587 		if(!read_var_sized(a, &extra_field_size, &var_size))
1588 			return ARCHIVE_EOF;
1589 
1590 		extra_data_size -= var_size;
1591 		if(ARCHIVE_OK != consume(a, var_size)) {
1592 			return ARCHIVE_EOF;
1593 		}
1594 
1595 		if(!read_var_sized(a, &extra_field_id, &var_size))
1596 			return ARCHIVE_EOF;
1597 
1598 		extra_data_size -= var_size;
1599 		if(ARCHIVE_OK != consume(a, var_size)) {
1600 			return ARCHIVE_EOF;
1601 		}
1602 
1603 		switch(extra_field_id) {
1604 			case EX_HASH:
1605 				ret = parse_file_extra_hash(a, rar,
1606 				    &extra_data_size);
1607 				break;
1608 			case EX_HTIME:
1609 				ret = parse_file_extra_htime(a, e, rar,
1610 				    &extra_data_size);
1611 				break;
1612 			case EX_REDIR:
1613 				ret = parse_file_extra_redir(a, e, rar,
1614 				    &extra_data_size);
1615 				break;
1616 			case EX_UOWNER:
1617 				ret = parse_file_extra_owner(a, e,
1618 				    &extra_data_size);
1619 				break;
1620 			case EX_VERSION:
1621 				ret = parse_file_extra_version(a, e,
1622 				    &extra_data_size);
1623 				break;
1624 			case EX_CRYPT:
1625 				/* fallthrough */
1626 			case EX_SUBDATA:
1627 				/* fallthrough */
1628 			default:
1629 				/* Skip unsupported entry. */
1630 				return consume(a, extra_data_size);
1631 		}
1632 	}
1633 
1634 	if(ret != ARCHIVE_OK) {
1635 		/* Attribute not implemented. */
1636 		return ret;
1637 	}
1638 
1639 	return ARCHIVE_OK;
1640 }
1641 
process_head_file(struct archive_read * a,struct rar5 * rar,struct archive_entry * entry,size_t block_flags)1642 static int process_head_file(struct archive_read* a, struct rar5* rar,
1643     struct archive_entry* entry, size_t block_flags)
1644 {
1645 	ssize_t extra_data_size = 0;
1646 	size_t data_size = 0;
1647 	size_t file_flags = 0;
1648 	size_t file_attr = 0;
1649 	size_t compression_info = 0;
1650 	size_t host_os = 0;
1651 	size_t name_size = 0;
1652 	uint64_t unpacked_size, window_size;
1653 	uint32_t mtime = 0, crc = 0;
1654 	int c_method = 0, c_version = 0;
1655 	char name_utf8_buf[MAX_NAME_IN_BYTES];
1656 	const uint8_t* p;
1657 
1658 	enum FILE_FLAGS {
1659 		DIRECTORY = 0x0001, UTIME = 0x0002, CRC32 = 0x0004,
1660 		UNKNOWN_UNPACKED_SIZE = 0x0008,
1661 	};
1662 
1663 	enum FILE_ATTRS {
1664 		ATTR_READONLY = 0x1, ATTR_HIDDEN = 0x2, ATTR_SYSTEM = 0x4,
1665 		ATTR_DIRECTORY = 0x10,
1666 	};
1667 
1668 	enum COMP_INFO_FLAGS {
1669 		SOLID = 0x0040,
1670 	};
1671 
1672 	enum HOST_OS {
1673 		HOST_WINDOWS = 0,
1674 		HOST_UNIX = 1,
1675 	};
1676 
1677 	archive_entry_clear(entry);
1678 
1679 	/* Do not reset file context if we're switching archives. */
1680 	if(!rar->cstate.switch_multivolume) {
1681 		reset_file_context(rar);
1682 	}
1683 
1684 	if(block_flags & HFL_EXTRA_DATA) {
1685 		size_t edata_size = 0;
1686 		if(!read_var_sized(a, &edata_size, NULL))
1687 			return ARCHIVE_EOF;
1688 
1689 		/* Intentional type cast from unsigned to signed. */
1690 		extra_data_size = (ssize_t) edata_size;
1691 	}
1692 
1693 	if(block_flags & HFL_DATA) {
1694 		if(!read_var_sized(a, &data_size, NULL))
1695 			return ARCHIVE_EOF;
1696 
1697 		rar->file.bytes_remaining = data_size;
1698 	} else {
1699 		rar->file.bytes_remaining = 0;
1700 
1701 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1702 				"no data found in file/service block");
1703 		return ARCHIVE_FATAL;
1704 	}
1705 
1706 	if(!read_var_sized(a, &file_flags, NULL))
1707 		return ARCHIVE_EOF;
1708 
1709 	if(!read_var(a, &unpacked_size, NULL))
1710 		return ARCHIVE_EOF;
1711 
1712 	if(file_flags & UNKNOWN_UNPACKED_SIZE) {
1713 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1714 		    "Files with unknown unpacked size are not supported");
1715 		return ARCHIVE_FATAL;
1716 	}
1717 
1718 	rar->file.dir = (uint8_t) ((file_flags & DIRECTORY) > 0);
1719 
1720 	if(!read_var_sized(a, &file_attr, NULL))
1721 		return ARCHIVE_EOF;
1722 
1723 	if(file_flags & UTIME) {
1724 		if(!read_u32(a, &mtime))
1725 			return ARCHIVE_EOF;
1726 	}
1727 
1728 	if(file_flags & CRC32) {
1729 		if(!read_u32(a, &crc))
1730 			return ARCHIVE_EOF;
1731 	}
1732 
1733 	if(!read_var_sized(a, &compression_info, NULL))
1734 		return ARCHIVE_EOF;
1735 
1736 	c_method = (int) (compression_info >> 7) & 0x7;
1737 	c_version = (int) (compression_info & 0x3f);
1738 
1739 	/* RAR5 seems to limit the dictionary size to 64MB. */
1740 	window_size = (rar->file.dir > 0) ?
1741 		0 :
1742 		g_unpack_window_size << ((compression_info >> 10) & 15);
1743 	rar->cstate.method = c_method;
1744 	rar->cstate.version = c_version + 50;
1745 	rar->file.solid = (compression_info & SOLID) > 0;
1746 
1747 	/* Archives which declare solid files without initializing the window
1748 	 * buffer first are invalid. */
1749 
1750 	if(rar->file.solid > 0 && rar->cstate.window_buf == NULL) {
1751 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1752 				  "Declared solid file, but no window buffer "
1753 				  "initialized yet.");
1754 		return ARCHIVE_FATAL;
1755 	}
1756 
1757 	/* Check if window_size is a sane value. Also, if the file is not
1758 	 * declared as a directory, disallow window_size == 0. */
1759 	if(window_size > (64 * 1024 * 1024) ||
1760 	    (rar->file.dir == 0 && window_size == 0))
1761 	{
1762 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1763 		    "Declared dictionary size is not supported.");
1764 		return ARCHIVE_FATAL;
1765 	}
1766 
1767 	if(rar->file.solid > 0) {
1768 		/* Re-check if current window size is the same as previous
1769 		 * window size (for solid files only). */
1770 		if(rar->file.solid_window_size > 0 &&
1771 		    rar->file.solid_window_size != (ssize_t) window_size)
1772 		{
1773 			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1774 			    "Window size for this solid file doesn't match "
1775 			    "the window size used in previous solid file. ");
1776 			return ARCHIVE_FATAL;
1777 		}
1778 	}
1779 
1780 	if(rar->cstate.window_size < (ssize_t) window_size &&
1781 	    rar->cstate.window_buf)
1782 	{
1783 		/* If window_buf has been allocated before, reallocate it, so
1784 		 * that its size will match new window_size. */
1785 
1786 		uint8_t* new_window_buf =
1787 			realloc(rar->cstate.window_buf, window_size);
1788 
1789 		if(!new_window_buf) {
1790 			archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1791 				"Not enough memory when trying to realloc the window "
1792 				"buffer.");
1793 			return ARCHIVE_FATAL;
1794 		}
1795 
1796 		rar->cstate.window_buf = new_window_buf;
1797 	}
1798 
1799 	/* Values up to 64M should fit into ssize_t on every
1800 	 * architecture. */
1801 	rar->cstate.window_size = (ssize_t) window_size;
1802 
1803 	if(rar->file.solid > 0 && rar->file.solid_window_size == 0) {
1804 		/* Solid files have to have the same window_size across
1805 		   whole archive. Remember the window_size parameter
1806 		   for first solid file found. */
1807 		rar->file.solid_window_size = rar->cstate.window_size;
1808 	}
1809 
1810 	init_window_mask(rar);
1811 
1812 	rar->file.service = 0;
1813 
1814 	if(!read_var_sized(a, &host_os, NULL))
1815 		return ARCHIVE_EOF;
1816 
1817 	if(host_os == HOST_WINDOWS) {
1818 		/* Host OS is Windows */
1819 
1820 		__LA_MODE_T mode;
1821 
1822 		if(file_attr & ATTR_DIRECTORY) {
1823 			if (file_attr & ATTR_READONLY) {
1824 				mode = 0555 | AE_IFDIR;
1825 			} else {
1826 				mode = 0755 | AE_IFDIR;
1827 			}
1828 		} else {
1829 			if (file_attr & ATTR_READONLY) {
1830 				mode = 0444 | AE_IFREG;
1831 			} else {
1832 				mode = 0644 | AE_IFREG;
1833 			}
1834 		}
1835 
1836 		archive_entry_set_mode(entry, mode);
1837 
1838 		if (file_attr & (ATTR_READONLY | ATTR_HIDDEN | ATTR_SYSTEM)) {
1839 			char *fflags_text, *ptr;
1840 			/* allocate for "rdonly,hidden,system," */
1841 			fflags_text = malloc(22 * sizeof(char));
1842 			if (fflags_text != NULL) {
1843 				ptr = fflags_text;
1844 				if (file_attr & ATTR_READONLY) {
1845 					strcpy(ptr, "rdonly,");
1846 					ptr = ptr + 7;
1847 				}
1848 				if (file_attr & ATTR_HIDDEN) {
1849 					strcpy(ptr, "hidden,");
1850 					ptr = ptr + 7;
1851 				}
1852 				if (file_attr & ATTR_SYSTEM) {
1853 					strcpy(ptr, "system,");
1854 					ptr = ptr + 7;
1855 				}
1856 				if (ptr > fflags_text) {
1857 					/* Delete trailing comma */
1858 					*(ptr - 1) = '\0';
1859 					archive_entry_copy_fflags_text(entry,
1860 					    fflags_text);
1861 				}
1862 				free(fflags_text);
1863 			}
1864 		}
1865 	} else if(host_os == HOST_UNIX) {
1866 		/* Host OS is Unix */
1867 		archive_entry_set_mode(entry, (__LA_MODE_T) file_attr);
1868 	} else {
1869 		/* Unknown host OS */
1870 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1871 				"Unsupported Host OS: 0x%x", (int) host_os);
1872 
1873 		return ARCHIVE_FATAL;
1874 	}
1875 
1876 	if(!read_var_sized(a, &name_size, NULL))
1877 		return ARCHIVE_EOF;
1878 
1879 	if(!read_ahead(a, name_size, &p))
1880 		return ARCHIVE_EOF;
1881 
1882 	if(name_size > (MAX_NAME_IN_CHARS - 1)) {
1883 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1884 				"Filename is too long");
1885 
1886 		return ARCHIVE_FATAL;
1887 	}
1888 
1889 	if(name_size == 0) {
1890 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1891 				"No filename specified");
1892 
1893 		return ARCHIVE_FATAL;
1894 	}
1895 
1896 	memcpy(name_utf8_buf, p, name_size);
1897 	name_utf8_buf[name_size] = 0;
1898 	if(ARCHIVE_OK != consume(a, name_size)) {
1899 		return ARCHIVE_EOF;
1900 	}
1901 
1902 	archive_entry_update_pathname_utf8(entry, name_utf8_buf);
1903 
1904 	if(extra_data_size > 0) {
1905 		int ret = process_head_file_extra(a, entry, rar,
1906 		    extra_data_size);
1907 
1908 		/*
1909 		 * TODO: rewrite or remove useless sanity check
1910 		 *       as extra_data_size is not passed as a pointer
1911 		 *
1912 		if(extra_data_size < 0) {
1913 			archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
1914 			    "File extra data size is not zero");
1915 			return ARCHIVE_FATAL;
1916 		}
1917 		 */
1918 
1919 		if(ret != ARCHIVE_OK)
1920 			return ret;
1921 	}
1922 
1923 	if((file_flags & UNKNOWN_UNPACKED_SIZE) == 0) {
1924 		rar->file.unpacked_size = (ssize_t) unpacked_size;
1925 		if(rar->file.redir_type == REDIR_TYPE_NONE)
1926 			archive_entry_set_size(entry, unpacked_size);
1927 	}
1928 
1929 	if(file_flags & UTIME) {
1930 		archive_entry_set_mtime(entry, (time_t) mtime, 0);
1931 	}
1932 
1933 	if(file_flags & CRC32) {
1934 		rar->file.stored_crc32 = crc;
1935 	}
1936 
1937 	if(!rar->cstate.switch_multivolume) {
1938 		/* Do not reinitialize unpacking state if we're switching
1939 		 * archives. */
1940 		rar->cstate.block_parsing_finished = 1;
1941 		rar->cstate.all_filters_applied = 1;
1942 		rar->cstate.initialized = 0;
1943 	}
1944 
1945 	if(rar->generic.split_before > 0) {
1946 		/* If now we're standing on a header that has a 'split before'
1947 		 * mark, it means we're standing on a 'continuation' file
1948 		 * header. Signal the caller that if it wants to move to
1949 		 * another file, it must call rar5_read_header() function
1950 		 * again. */
1951 
1952 		return ARCHIVE_RETRY;
1953 	} else {
1954 		return ARCHIVE_OK;
1955 	}
1956 }
1957 
process_head_service(struct archive_read * a,struct rar5 * rar,struct archive_entry * entry,size_t block_flags)1958 static int process_head_service(struct archive_read* a, struct rar5* rar,
1959     struct archive_entry* entry, size_t block_flags)
1960 {
1961 	/* Process this SERVICE block the same way as FILE blocks. */
1962 	int ret = process_head_file(a, rar, entry, block_flags);
1963 	if(ret != ARCHIVE_OK)
1964 		return ret;
1965 
1966 	rar->file.service = 1;
1967 
1968 	/* But skip the data part automatically. It's no use for the user
1969 	 * anyway.  It contains only service data, not even needed to
1970 	 * properly unpack the file. */
1971 	ret = rar5_read_data_skip(a);
1972 	if(ret != ARCHIVE_OK)
1973 		return ret;
1974 
1975 	/* After skipping, try parsing another block automatically. */
1976 	return ARCHIVE_RETRY;
1977 }
1978 
process_head_main(struct archive_read * a,struct rar5 * rar,struct archive_entry * entry,size_t block_flags)1979 static int process_head_main(struct archive_read* a, struct rar5* rar,
1980     struct archive_entry* entry, size_t block_flags)
1981 {
1982 	int ret;
1983 	size_t extra_data_size = 0;
1984 	size_t extra_field_size = 0;
1985 	size_t extra_field_id = 0;
1986 	size_t archive_flags = 0;
1987 
1988 	enum MAIN_FLAGS {
1989 		VOLUME = 0x0001,         /* multi-volume archive */
1990 		VOLUME_NUMBER = 0x0002,  /* volume number, first vol doesn't
1991 					  * have it */
1992 		SOLID = 0x0004,          /* solid archive */
1993 		PROTECT = 0x0008,        /* contains Recovery info */
1994 		LOCK = 0x0010,           /* readonly flag, not used */
1995 	};
1996 
1997 	enum MAIN_EXTRA {
1998 		// Just one attribute here.
1999 		LOCATOR = 0x01,
2000 	};
2001 
2002 	(void) entry;
2003 
2004 	if(block_flags & HFL_EXTRA_DATA) {
2005 		if(!read_var_sized(a, &extra_data_size, NULL))
2006 			return ARCHIVE_EOF;
2007 	} else {
2008 		extra_data_size = 0;
2009 	}
2010 
2011 	if(!read_var_sized(a, &archive_flags, NULL)) {
2012 		return ARCHIVE_EOF;
2013 	}
2014 
2015 	rar->main.volume = (archive_flags & VOLUME) > 0;
2016 	rar->main.solid = (archive_flags & SOLID) > 0;
2017 
2018 	if(archive_flags & VOLUME_NUMBER) {
2019 		size_t v = 0;
2020 		if(!read_var_sized(a, &v, NULL)) {
2021 			return ARCHIVE_EOF;
2022 		}
2023 
2024 		if (v > UINT_MAX) {
2025 			archive_set_error(&a->archive,
2026 			    ARCHIVE_ERRNO_FILE_FORMAT,
2027 			    "Invalid volume number");
2028 			return ARCHIVE_FATAL;
2029 		}
2030 
2031 		rar->main.vol_no = (unsigned int) v;
2032 	} else {
2033 		rar->main.vol_no = 0;
2034 	}
2035 
2036 	if(rar->vol.expected_vol_no > 0 &&
2037 		rar->main.vol_no != rar->vol.expected_vol_no)
2038 	{
2039 		/* Returning EOF instead of FATAL because of strange
2040 		 * libarchive behavior. When opening multiple files via
2041 		 * archive_read_open_filenames(), after reading up the whole
2042 		 * last file, the __archive_read_ahead function wraps up to
2043 		 * the first archive instead of returning EOF. */
2044 		return ARCHIVE_EOF;
2045 	}
2046 
2047 	if(extra_data_size == 0) {
2048 		/* Early return. */
2049 		return ARCHIVE_OK;
2050 	}
2051 
2052 	if(!read_var_sized(a, &extra_field_size, NULL)) {
2053 		return ARCHIVE_EOF;
2054 	}
2055 
2056 	if(!read_var_sized(a, &extra_field_id, NULL)) {
2057 		return ARCHIVE_EOF;
2058 	}
2059 
2060 	if(extra_field_size == 0) {
2061 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2062 		    "Invalid extra field size");
2063 		return ARCHIVE_FATAL;
2064 	}
2065 
2066 	switch(extra_field_id) {
2067 		case LOCATOR:
2068 			ret = process_main_locator_extra_block(a, rar);
2069 			if(ret != ARCHIVE_OK) {
2070 				/* Error while parsing main locator extra
2071 				 * block. */
2072 				return ret;
2073 			}
2074 
2075 			break;
2076 		default:
2077 			archive_set_error(&a->archive,
2078 			    ARCHIVE_ERRNO_FILE_FORMAT,
2079 			    "Unsupported extra type (0x%x)",
2080 			    (int) extra_field_id);
2081 			return ARCHIVE_FATAL;
2082 	}
2083 
2084 	return ARCHIVE_OK;
2085 }
2086 
skip_unprocessed_bytes(struct archive_read * a)2087 static int skip_unprocessed_bytes(struct archive_read* a) {
2088 	struct rar5* rar = get_context(a);
2089 	int ret;
2090 
2091 	if(rar->file.bytes_remaining) {
2092 		/* Use different skipping method in block merging mode than in
2093 		 * normal mode. If merge mode is active, rar5_read_data_skip
2094 		 * can't be used, because it could allow recursive use of
2095 		 * merge_block() * function, and this function doesn't support
2096 		 * recursive use. */
2097 		if(rar->merge_mode) {
2098 			/* Discard whole merged block. This is valid in solid
2099 			 * mode as well, because the code will discard blocks
2100 			 * only if those blocks are safe to discard (i.e.
2101 			 * they're not FILE blocks).  */
2102 			ret = consume(a, rar->file.bytes_remaining);
2103 			if(ret != ARCHIVE_OK) {
2104 				return ret;
2105 			}
2106 			rar->file.bytes_remaining = 0;
2107 		} else {
2108 			/* If we're not in merge mode, use safe skipping code.
2109 			 * This will ensure we'll handle solid archives
2110 			 * properly. */
2111 			ret = rar5_read_data_skip(a);
2112 			if(ret != ARCHIVE_OK) {
2113 				return ret;
2114 			}
2115 		}
2116 	}
2117 
2118 	return ARCHIVE_OK;
2119 }
2120 
2121 static int scan_for_signature(struct archive_read* a);
2122 
2123 /* Base block processing function. A 'base block' is a RARv5 header block
2124  * that tells the reader what kind of data is stored inside the block.
2125  *
2126  * From the birds-eye view a RAR file looks file this:
2127  *
2128  * <magic><base_block_1><base_block_2>...<base_block_n>
2129  *
2130  * There are a few types of base blocks. Those types are specified inside
2131  * the 'switch' statement in this function. For example purposes, I'll write
2132  * how a standard RARv5 file could look like here:
2133  *
2134  * <magic><MAIN><FILE><FILE><FILE><SERVICE><ENDARC>
2135  *
2136  * The structure above could describe an archive file with 3 files in it,
2137  * one service "QuickOpen" block (that is ignored by this parser), and an
2138  * end of file base block marker.
2139  *
2140  * If the file is stored in multiple archive files ("multiarchive"), it might
2141  * look like this:
2142  *
2143  * .part01.rar: <magic><MAIN><FILE><ENDARC>
2144  * .part02.rar: <magic><MAIN><FILE><ENDARC>
2145  * .part03.rar: <magic><MAIN><FILE><ENDARC>
2146  *
2147  * This example could describe 3 RAR files that contain ONE archived file.
2148  * Or it could describe 3 RAR files that contain 3 different files. Or 3
2149  * RAR files than contain 2 files. It all depends what metadata is stored in
2150  * the headers of <FILE> blocks.
2151  *
2152  * Each <FILE> block contains info about its size, the name of the file it's
2153  * storing inside, and whether this FILE block is a continuation block of
2154  * previous archive ('split before'), and is this FILE block should be
2155  * continued in another archive ('split after'). By parsing the 'split before'
2156  * and 'split after' flags, we're able to tell if multiple <FILE> base blocks
2157  * are describing one file, or multiple files (with the same filename, for
2158  * example).
2159  *
2160  * One thing to note is that if we're parsing the first <FILE> block, and
2161  * we see 'split after' flag, then we need to jump over to another <FILE>
2162  * block to be able to decompress rest of the data. To do this, we need
2163  * to skip the <ENDARC> block, then switch to another file, then skip the
2164  * <magic> block, <MAIN> block, and then we're standing on the proper
2165  * <FILE> block.
2166  */
2167 
process_base_block(struct archive_read * a,struct archive_entry * entry)2168 static int process_base_block(struct archive_read* a,
2169     struct archive_entry* entry)
2170 {
2171 	const size_t SMALLEST_RAR5_BLOCK_SIZE = 3;
2172 
2173 	struct rar5* rar = get_context(a);
2174 	uint32_t hdr_crc, computed_crc;
2175 	size_t raw_hdr_size = 0, hdr_size_len, hdr_size;
2176 	size_t header_id = 0;
2177 	size_t header_flags = 0;
2178 	const uint8_t* p;
2179 	int ret;
2180 
2181 	enum HEADER_TYPE {
2182 		HEAD_MARK    = 0x00, HEAD_MAIN  = 0x01, HEAD_FILE   = 0x02,
2183 		HEAD_SERVICE = 0x03, HEAD_CRYPT = 0x04, HEAD_ENDARC = 0x05,
2184 		HEAD_UNKNOWN = 0xff,
2185 	};
2186 
2187 	/* Skip any unprocessed data for this file. */
2188 	ret = skip_unprocessed_bytes(a);
2189 	if(ret != ARCHIVE_OK)
2190 		return ret;
2191 
2192 	/* Read the expected CRC32 checksum. */
2193 	if(!read_u32(a, &hdr_crc)) {
2194 		return ARCHIVE_EOF;
2195 	}
2196 
2197 	/* Read header size. */
2198 	if(!read_var_sized(a, &raw_hdr_size, &hdr_size_len)) {
2199 		return ARCHIVE_EOF;
2200 	}
2201 
2202 	hdr_size = raw_hdr_size + hdr_size_len;
2203 
2204 	/* Sanity check, maximum header size for RAR5 is 2MB. */
2205 	if(hdr_size > (2 * 1024 * 1024)) {
2206 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2207 		    "Base block header is too large");
2208 
2209 		return ARCHIVE_FATAL;
2210 	}
2211 
2212 	/* Additional sanity checks to weed out invalid files. */
2213 	if(raw_hdr_size == 0 || hdr_size_len == 0 ||
2214 		hdr_size < SMALLEST_RAR5_BLOCK_SIZE)
2215 	{
2216 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2217 		    "Too small block encountered (%zu bytes)",
2218 		    raw_hdr_size);
2219 
2220 		return ARCHIVE_FATAL;
2221 	}
2222 
2223 	/* Read the whole header data into memory, maximum memory use here is
2224 	 * 2MB. */
2225 	if(!read_ahead(a, hdr_size, &p)) {
2226 		return ARCHIVE_EOF;
2227 	}
2228 
2229 	/* Verify the CRC32 of the header data. */
2230 	computed_crc = (uint32_t) crc32(0, p, (int) hdr_size);
2231 	if(computed_crc != hdr_crc) {
2232 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2233 		    "Header CRC error");
2234 
2235 		return ARCHIVE_FATAL;
2236 	}
2237 
2238 	/* If the checksum is OK, we proceed with parsing. */
2239 	if(ARCHIVE_OK != consume(a, hdr_size_len)) {
2240 		return ARCHIVE_EOF;
2241 	}
2242 
2243 	if(!read_var_sized(a, &header_id, NULL))
2244 		return ARCHIVE_EOF;
2245 
2246 	if(!read_var_sized(a, &header_flags, NULL))
2247 		return ARCHIVE_EOF;
2248 
2249 	rar->generic.split_after = (header_flags & HFL_SPLIT_AFTER) > 0;
2250 	rar->generic.split_before = (header_flags & HFL_SPLIT_BEFORE) > 0;
2251 	rar->generic.size = (int)hdr_size;
2252 	rar->generic.last_header_id = (int)header_id;
2253 	rar->main.endarc = 0;
2254 
2255 	/* Those are possible header ids in RARv5. */
2256 	switch(header_id) {
2257 		case HEAD_MAIN:
2258 			ret = process_head_main(a, rar, entry, header_flags);
2259 
2260 			/* Main header doesn't have any files in it, so it's
2261 			 * pointless to return to the caller. Retry to next
2262 			 * header, which should be HEAD_FILE/HEAD_SERVICE. */
2263 			if(ret == ARCHIVE_OK)
2264 				return ARCHIVE_RETRY;
2265 
2266 			return ret;
2267 		case HEAD_SERVICE:
2268 			ret = process_head_service(a, rar, entry, header_flags);
2269 			return ret;
2270 		case HEAD_FILE:
2271 			ret = process_head_file(a, rar, entry, header_flags);
2272 			return ret;
2273 		case HEAD_CRYPT:
2274 			archive_set_error(&a->archive,
2275 			    ARCHIVE_ERRNO_FILE_FORMAT,
2276 			    "Encryption is not supported");
2277 			return ARCHIVE_FATAL;
2278 		case HEAD_ENDARC:
2279 			rar->main.endarc = 1;
2280 
2281 			/* After encountering an end of file marker, we need
2282 			 * to take into consideration if this archive is
2283 			 * continued in another file (i.e. is it part01.rar:
2284 			 * is there a part02.rar?) */
2285 			if(rar->main.volume) {
2286 				/* In case there is part02.rar, position the
2287 				 * read pointer in a proper place, so we can
2288 				 * resume parsing. */
2289 				ret = scan_for_signature(a);
2290 				if(ret == ARCHIVE_FATAL) {
2291 					return ARCHIVE_EOF;
2292 				} else {
2293 					if(rar->vol.expected_vol_no ==
2294 					    UINT_MAX) {
2295 						archive_set_error(&a->archive,
2296 						    ARCHIVE_ERRNO_FILE_FORMAT,
2297 						    "Header error");
2298 							return ARCHIVE_FATAL;
2299 					}
2300 
2301 					rar->vol.expected_vol_no =
2302 					    rar->main.vol_no + 1;
2303 					return ARCHIVE_OK;
2304 				}
2305 			} else {
2306 				return ARCHIVE_EOF;
2307 			}
2308 		case HEAD_MARK:
2309 			return ARCHIVE_EOF;
2310 		default:
2311 			if((header_flags & HFL_SKIP_IF_UNKNOWN) == 0) {
2312 				archive_set_error(&a->archive,
2313 				    ARCHIVE_ERRNO_FILE_FORMAT,
2314 				    "Header type error");
2315 				return ARCHIVE_FATAL;
2316 			} else {
2317 				/* If the block is marked as 'skip if unknown',
2318 				 * do as the flag says: skip the block
2319 				 * instead on failing on it. */
2320 				return ARCHIVE_RETRY;
2321 			}
2322 	}
2323 
2324 #if !defined WIN32
2325 	// Not reached.
2326 	archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
2327 	    "Internal unpacker error");
2328 	return ARCHIVE_FATAL;
2329 #endif
2330 }
2331 
skip_base_block(struct archive_read * a)2332 static int skip_base_block(struct archive_read* a) {
2333 	int ret;
2334 	struct rar5* rar = get_context(a);
2335 
2336 	/* Create a new local archive_entry structure that will be operated on
2337 	 * by header reader; operations on this archive_entry will be discarded.
2338 	 */
2339 	struct archive_entry* entry = archive_entry_new();
2340 	ret = process_base_block(a, entry);
2341 
2342 	/* Discard operations on this archive_entry structure. */
2343 	archive_entry_free(entry);
2344 	if(ret == ARCHIVE_FATAL)
2345 		return ret;
2346 
2347 	if(rar->generic.last_header_id == 2 && rar->generic.split_before > 0)
2348 		return ARCHIVE_OK;
2349 
2350 	if(ret == ARCHIVE_OK)
2351 		return ARCHIVE_RETRY;
2352 	else
2353 		return ret;
2354 }
2355 
try_skip_sfx(struct archive_read * a)2356 static int try_skip_sfx(struct archive_read *a)
2357 {
2358 	const char *p;
2359 
2360 	if ((p = __archive_read_ahead(a, 7, NULL)) == NULL)
2361 		return ARCHIVE_EOF;
2362 
2363 	if ((p[0] == 'M' && p[1] == 'Z') || memcmp(p, "\x7F\x45LF", 4) == 0)
2364 	{
2365 		char signature[sizeof(rar5_signature_xor)];
2366 		const void *h;
2367 		const char *q;
2368 		size_t skip, total = 0;
2369 		ssize_t bytes, window = 4096;
2370 
2371 		rar5_signature(signature);
2372 
2373 		while (total + window <= (1024 * 512)) {
2374 			h = __archive_read_ahead(a, window, &bytes);
2375 			if (h == NULL) {
2376 				/* Remaining bytes are less than window. */
2377 				window >>= 1;
2378 				if (window < 0x40)
2379 					goto fatal;
2380 				continue;
2381 			}
2382 			if (bytes < 0x40)
2383 				goto fatal;
2384 			p = h;
2385 			q = p + bytes;
2386 
2387 			/*
2388 			 * Scan ahead until we find something that looks
2389 			 * like the RAR header.
2390 			 */
2391 			while (p + 8 < q) {
2392 				if (memcmp(p, signature, sizeof(signature)) == 0) {
2393 					skip = p - (const char *)h;
2394 					__archive_read_consume(a, skip);
2395 					return (ARCHIVE_OK);
2396 				}
2397 				p += 0x10;
2398 			}
2399 			skip = p - (const char *)h;
2400 			__archive_read_consume(a, skip);
2401 			total += skip;
2402 		}
2403 	}
2404 
2405 	return ARCHIVE_OK;
2406 fatal:
2407 	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2408 			"Couldn't find out RAR header");
2409 	return (ARCHIVE_FATAL);
2410 }
2411 
rar5_read_header(struct archive_read * a,struct archive_entry * entry)2412 static int rar5_read_header(struct archive_read *a,
2413     struct archive_entry *entry)
2414 {
2415 	struct rar5* rar = get_context(a);
2416 	int ret;
2417 
2418 	if(rar->header_initialized == 0) {
2419 		init_header(a);
2420 		if ((ret = try_skip_sfx(a)) < ARCHIVE_WARN)
2421 			return ret;
2422 		rar->header_initialized = 1;
2423 	}
2424 
2425 	if(rar->skipped_magic == 0) {
2426 		if(ARCHIVE_OK != consume(a, sizeof(rar5_signature_xor))) {
2427 			return ARCHIVE_EOF;
2428 		}
2429 
2430 		rar->skipped_magic = 1;
2431 	}
2432 
2433 	do {
2434 		ret = process_base_block(a, entry);
2435 	} while(ret == ARCHIVE_RETRY ||
2436 			(rar->main.endarc > 0 && ret == ARCHIVE_OK));
2437 
2438 	return ret;
2439 }
2440 
init_unpack(struct rar5 * rar)2441 static void init_unpack(struct rar5* rar) {
2442 	rar->file.calculated_crc32 = 0;
2443 	init_window_mask(rar);
2444 
2445 	free(rar->cstate.window_buf);
2446 	free(rar->cstate.filtered_buf);
2447 
2448 	if(rar->cstate.window_size > 0) {
2449 		rar->cstate.window_buf = calloc(1, rar->cstate.window_size);
2450 		rar->cstate.filtered_buf = calloc(1, rar->cstate.window_size);
2451 	} else {
2452 		rar->cstate.window_buf = NULL;
2453 		rar->cstate.filtered_buf = NULL;
2454 	}
2455 
2456 	rar->cstate.write_ptr = 0;
2457 	rar->cstate.last_write_ptr = 0;
2458 
2459 	memset(&rar->cstate.bd, 0, sizeof(rar->cstate.bd));
2460 	memset(&rar->cstate.ld, 0, sizeof(rar->cstate.ld));
2461 	memset(&rar->cstate.dd, 0, sizeof(rar->cstate.dd));
2462 	memset(&rar->cstate.ldd, 0, sizeof(rar->cstate.ldd));
2463 	memset(&rar->cstate.rd, 0, sizeof(rar->cstate.rd));
2464 }
2465 
update_crc(struct rar5 * rar,const uint8_t * p,size_t to_read)2466 static void update_crc(struct rar5* rar, const uint8_t* p, size_t to_read) {
2467     int verify_crc;
2468 
2469 	if(rar->skip_mode) {
2470 #if defined CHECK_CRC_ON_SOLID_SKIP
2471 		verify_crc = 1;
2472 #else
2473 		verify_crc = 0;
2474 #endif
2475 	} else
2476 		verify_crc = 1;
2477 
2478 	if(verify_crc) {
2479 		/* Don't update CRC32 if the file doesn't have the
2480 		 * `stored_crc32` info filled in. */
2481 		if(rar->file.stored_crc32 > 0) {
2482 			rar->file.calculated_crc32 =
2483 				crc32(rar->file.calculated_crc32, p, (unsigned int)to_read);
2484 		}
2485 
2486 		/* Check if the file uses an optional BLAKE2sp checksum
2487 		 * algorithm. */
2488 		if(rar->file.has_blake2 > 0) {
2489 			/* Return value of the `update` function is always 0,
2490 			 * so we can explicitly ignore it here. */
2491 			(void) blake2sp_update(&rar->file.b2state, p, to_read);
2492 		}
2493 	}
2494 }
2495 
create_decode_tables(uint8_t * bit_length,struct decode_table * table,int size)2496 static int create_decode_tables(uint8_t* bit_length,
2497     struct decode_table* table, int size)
2498 {
2499 	int code, upper_limit = 0, i, lc[16];
2500 	uint32_t decode_pos_clone[rar5_countof(table->decode_pos)];
2501 	ssize_t cur_len, quick_data_size;
2502 
2503 	memset(&lc, 0, sizeof(lc));
2504 	memset(table->decode_num, 0, sizeof(table->decode_num));
2505 	table->size = size;
2506 	table->quick_bits = size == HUFF_NC ? 10 : 7;
2507 
2508 	for(i = 0; i < size; i++) {
2509 		lc[bit_length[i] & 15]++;
2510 	}
2511 
2512 	lc[0] = 0;
2513 	table->decode_pos[0] = 0;
2514 	table->decode_len[0] = 0;
2515 
2516 	for(i = 1; i < 16; i++) {
2517 		upper_limit += lc[i];
2518 
2519 		table->decode_len[i] = upper_limit << (16 - i);
2520 		table->decode_pos[i] = table->decode_pos[i - 1] + lc[i - 1];
2521 
2522 		upper_limit <<= 1;
2523 	}
2524 
2525 	memcpy(decode_pos_clone, table->decode_pos, sizeof(decode_pos_clone));
2526 
2527 	for(i = 0; i < size; i++) {
2528 		uint8_t clen = bit_length[i] & 15;
2529 		if(clen > 0) {
2530 			int last_pos = decode_pos_clone[clen];
2531 			table->decode_num[last_pos] = i;
2532 			decode_pos_clone[clen]++;
2533 		}
2534 	}
2535 
2536 	quick_data_size = (int64_t)1 << table->quick_bits;
2537 	cur_len = 1;
2538 	for(code = 0; code < quick_data_size; code++) {
2539 		int bit_field = code << (16 - table->quick_bits);
2540 		int dist, pos;
2541 
2542 		while(cur_len < rar5_countof(table->decode_len) &&
2543 				bit_field >= table->decode_len[cur_len]) {
2544 			cur_len++;
2545 		}
2546 
2547 		table->quick_len[code] = (uint8_t) cur_len;
2548 
2549 		dist = bit_field - table->decode_len[cur_len - 1];
2550 		dist >>= (16 - cur_len);
2551 
2552 		pos = table->decode_pos[cur_len & 15] + dist;
2553 		if(cur_len < rar5_countof(table->decode_pos) && pos < size) {
2554 			table->quick_num[code] = table->decode_num[pos];
2555 		} else {
2556 			table->quick_num[code] = 0;
2557 		}
2558 	}
2559 
2560 	return ARCHIVE_OK;
2561 }
2562 
decode_number(struct archive_read * a,struct decode_table * table,const uint8_t * p,uint16_t * num)2563 static int decode_number(struct archive_read* a, struct decode_table* table,
2564     const uint8_t* p, uint16_t* num)
2565 {
2566 	int i, bits, dist, ret;
2567 	uint16_t bitfield;
2568 	uint32_t pos;
2569 	struct rar5* rar = get_context(a);
2570 
2571 	if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &bitfield))) {
2572 		return ret;
2573 	}
2574 
2575 	bitfield &= 0xfffe;
2576 
2577 	if(bitfield < table->decode_len[table->quick_bits]) {
2578 		int code = bitfield >> (16 - table->quick_bits);
2579 		skip_bits(rar, table->quick_len[code]);
2580 		*num = table->quick_num[code];
2581 		return ARCHIVE_OK;
2582 	}
2583 
2584 	bits = 15;
2585 
2586 	for(i = table->quick_bits + 1; i < 15; i++) {
2587 		if(bitfield < table->decode_len[i]) {
2588 			bits = i;
2589 			break;
2590 		}
2591 	}
2592 
2593 	skip_bits(rar, bits);
2594 
2595 	dist = bitfield - table->decode_len[bits - 1];
2596 	dist >>= (16 - bits);
2597 	pos = table->decode_pos[bits] + dist;
2598 
2599 	if(pos >= table->size)
2600 		pos = 0;
2601 
2602 	*num = table->decode_num[pos];
2603 	return ARCHIVE_OK;
2604 }
2605 
2606 /* Reads and parses Huffman tables from the beginning of the block. */
parse_tables(struct archive_read * a,struct rar5 * rar,const uint8_t * p)2607 static int parse_tables(struct archive_read* a, struct rar5* rar,
2608     const uint8_t* p)
2609 {
2610 	int ret, value, i, w, idx = 0;
2611 	uint8_t bit_length[HUFF_BC],
2612 		table[HUFF_TABLE_SIZE],
2613 		nibble_mask = 0xF0,
2614 		nibble_shift = 4;
2615 
2616 	enum { ESCAPE = 15 };
2617 
2618 	/* The data for table generation is compressed using a simple RLE-like
2619 	 * algorithm when storing zeroes, so we need to unpack it first. */
2620 	for(w = 0, i = 0; w < HUFF_BC;) {
2621 		if(i >= rar->cstate.cur_block_size) {
2622 			/* Truncated data, can't continue. */
2623 			archive_set_error(&a->archive,
2624 			    ARCHIVE_ERRNO_FILE_FORMAT,
2625 			    "Truncated data in huffman tables");
2626 			return ARCHIVE_FATAL;
2627 		}
2628 
2629 		value = (p[i] & nibble_mask) >> nibble_shift;
2630 
2631 		if(nibble_mask == 0x0F)
2632 			++i;
2633 
2634 		nibble_mask ^= 0xFF;
2635 		nibble_shift ^= 4;
2636 
2637 		/* Values smaller than 15 is data, so we write it directly.
2638 		 * Value 15 is a flag telling us that we need to unpack more
2639 		 * bytes. */
2640 		if(value == ESCAPE) {
2641 			value = (p[i] & nibble_mask) >> nibble_shift;
2642 			if(nibble_mask == 0x0F)
2643 				++i;
2644 			nibble_mask ^= 0xFF;
2645 			nibble_shift ^= 4;
2646 
2647 			if(value == 0) {
2648 				/* We sometimes need to write the actual value
2649 				 * of 15, so this case handles that. */
2650 				bit_length[w++] = ESCAPE;
2651 			} else {
2652 				int k;
2653 
2654 				/* Fill zeroes. */
2655 				for(k = 0; (k < value + 2) && (w < HUFF_BC);
2656 				    k++) {
2657 					bit_length[w++] = 0;
2658 				}
2659 			}
2660 		} else {
2661 			bit_length[w++] = value;
2662 		}
2663 	}
2664 
2665 	rar->bits.in_addr = i;
2666 	rar->bits.bit_addr = nibble_shift ^ 4;
2667 
2668 	ret = create_decode_tables(bit_length, &rar->cstate.bd, HUFF_BC);
2669 	if(ret != ARCHIVE_OK) {
2670 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2671 		    "Decoding huffman tables failed");
2672 		return ARCHIVE_FATAL;
2673 	}
2674 
2675 	for(i = 0; i < HUFF_TABLE_SIZE;) {
2676 		uint16_t num;
2677 
2678 		ret = decode_number(a, &rar->cstate.bd, p, &num);
2679 		if(ret != ARCHIVE_OK) {
2680 			archive_set_error(&a->archive,
2681 			    ARCHIVE_ERRNO_FILE_FORMAT,
2682 			    "Decoding huffman tables failed");
2683 			return ARCHIVE_FATAL;
2684 		}
2685 
2686 		if(num < 16) {
2687 			/* 0..15: store directly */
2688 			table[i] = (uint8_t) num;
2689 			i++;
2690 		} else if(num < 18) {
2691 			/* 16..17: repeat previous code */
2692 			uint16_t n;
2693 
2694 			if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n)))
2695 				return ret;
2696 
2697 			if(num == 16) {
2698 				n >>= 13;
2699 				n += 3;
2700 				skip_bits(rar, 3);
2701 			} else {
2702 				n >>= 9;
2703 				n += 11;
2704 				skip_bits(rar, 7);
2705 			}
2706 
2707 			if(i > 0) {
2708 				while(n-- > 0 && i < HUFF_TABLE_SIZE) {
2709 					table[i] = table[i - 1];
2710 					i++;
2711 				}
2712 			} else {
2713 				archive_set_error(&a->archive,
2714 				    ARCHIVE_ERRNO_FILE_FORMAT,
2715 				    "Unexpected error when decoding "
2716 				    "huffman tables");
2717 				return ARCHIVE_FATAL;
2718 			}
2719 		} else {
2720 			/* other codes: fill with zeroes `n` times */
2721 			uint16_t n;
2722 
2723 			if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &n)))
2724 				return ret;
2725 
2726 			if(num == 18) {
2727 				n >>= 13;
2728 				n += 3;
2729 				skip_bits(rar, 3);
2730 			} else {
2731 				n >>= 9;
2732 				n += 11;
2733 				skip_bits(rar, 7);
2734 			}
2735 
2736 			while(n-- > 0 && i < HUFF_TABLE_SIZE)
2737 				table[i++] = 0;
2738 		}
2739 	}
2740 
2741 	ret = create_decode_tables(&table[idx], &rar->cstate.ld, HUFF_NC);
2742 	if(ret != ARCHIVE_OK) {
2743 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2744 		     "Failed to create literal table");
2745 		return ARCHIVE_FATAL;
2746 	}
2747 
2748 	idx += HUFF_NC;
2749 
2750 	ret = create_decode_tables(&table[idx], &rar->cstate.dd, HUFF_DC);
2751 	if(ret != ARCHIVE_OK) {
2752 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2753 		    "Failed to create distance table");
2754 		return ARCHIVE_FATAL;
2755 	}
2756 
2757 	idx += HUFF_DC;
2758 
2759 	ret = create_decode_tables(&table[idx], &rar->cstate.ldd, HUFF_LDC);
2760 	if(ret != ARCHIVE_OK) {
2761 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2762 		    "Failed to create lower bits of distances table");
2763 		return ARCHIVE_FATAL;
2764 	}
2765 
2766 	idx += HUFF_LDC;
2767 
2768 	ret = create_decode_tables(&table[idx], &rar->cstate.rd, HUFF_RC);
2769 	if(ret != ARCHIVE_OK) {
2770 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2771 		    "Failed to create repeating distances table");
2772 		return ARCHIVE_FATAL;
2773 	}
2774 
2775 	return ARCHIVE_OK;
2776 }
2777 
2778 /* Parses the block header, verifies its CRC byte, and saves the header
2779  * fields inside the `hdr` pointer. */
parse_block_header(struct archive_read * a,const uint8_t * p,ssize_t * block_size,struct compressed_block_header * hdr)2780 static int parse_block_header(struct archive_read* a, const uint8_t* p,
2781     ssize_t* block_size, struct compressed_block_header* hdr)
2782 {
2783 	uint8_t calculated_cksum;
2784 	memcpy(hdr, p, sizeof(struct compressed_block_header));
2785 
2786 	if(bf_byte_count(hdr) > 2) {
2787 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2788 		    "Unsupported block header size (was %d, max is 2)",
2789 		    bf_byte_count(hdr));
2790 		return ARCHIVE_FATAL;
2791 	}
2792 
2793 	/* This should probably use bit reader interface in order to be more
2794 	 * future-proof. */
2795 	*block_size = 0;
2796 	switch(bf_byte_count(hdr)) {
2797 		/* 1-byte block size */
2798 		case 0:
2799 			*block_size = *(const uint8_t*) &p[2];
2800 			break;
2801 
2802 		/* 2-byte block size */
2803 		case 1:
2804 			*block_size = archive_le16dec(&p[2]);
2805 			break;
2806 
2807 		/* 3-byte block size */
2808 		case 2:
2809 			*block_size = archive_le32dec(&p[2]);
2810 			*block_size &= 0x00FFFFFF;
2811 			break;
2812 
2813 		/* Other block sizes are not supported. This case is not
2814 		 * reached, because we have an 'if' guard before the switch
2815 		 * that makes sure of it. */
2816 		default:
2817 			return ARCHIVE_FATAL;
2818 	}
2819 
2820 	/* Verify the block header checksum. 0x5A is a magic value and is
2821 	 * always * constant. */
2822 	calculated_cksum = 0x5A
2823 	    ^ (uint8_t) hdr->block_flags_u8
2824 	    ^ (uint8_t) *block_size
2825 	    ^ (uint8_t) (*block_size >> 8)
2826 	    ^ (uint8_t) (*block_size >> 16);
2827 
2828 	if(calculated_cksum != hdr->block_cksum) {
2829 #ifndef DONT_FAIL_ON_CRC_ERROR
2830 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2831 		    "Block checksum error: got 0x%x, expected 0x%x",
2832 		    hdr->block_cksum, calculated_cksum);
2833 
2834 		return ARCHIVE_FATAL;
2835 #endif
2836 	}
2837 
2838 	return ARCHIVE_OK;
2839 }
2840 
2841 /* Convenience function used during filter processing. */
parse_filter_data(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint32_t * filter_data)2842 static int parse_filter_data(struct archive_read* a, struct rar5* rar,
2843 	const uint8_t* p, uint32_t* filter_data)
2844 {
2845 	int i, bytes, ret;
2846 	uint32_t data = 0;
2847 
2848 	if(ARCHIVE_OK != (ret = read_consume_bits(a, rar, p, 2, &bytes)))
2849 		return ret;
2850 
2851 	bytes++;
2852 
2853 	for(i = 0; i < bytes; i++) {
2854 		uint16_t byte;
2855 
2856 		if(ARCHIVE_OK != (ret = read_bits_16(a, rar, p, &byte))) {
2857 			return ret;
2858 		}
2859 
2860 		/* Cast to uint32_t will ensure the shift operation will not
2861 		 * produce undefined result. */
2862 		data += ((uint32_t) byte >> 8) << (i * 8);
2863 		skip_bits(rar, 8);
2864 	}
2865 
2866 	*filter_data = data;
2867 	return ARCHIVE_OK;
2868 }
2869 
2870 /* Function is used during sanity checking. */
is_valid_filter_block_start(struct rar5 * rar,uint32_t start)2871 static int is_valid_filter_block_start(struct rar5* rar,
2872     uint32_t start)
2873 {
2874 	const int64_t block_start = (ssize_t) start + rar->cstate.write_ptr;
2875 	const int64_t last_bs = rar->cstate.last_block_start;
2876 	const ssize_t last_bl = rar->cstate.last_block_length;
2877 
2878 	if(last_bs == 0 || last_bl == 0) {
2879 		/* We didn't have any filters yet, so accept this offset. */
2880 		return 1;
2881 	}
2882 
2883 	if(block_start >= last_bs + last_bl) {
2884 		/* Current offset is bigger than last block's end offset, so
2885 		 * accept current offset. */
2886 		return 1;
2887 	}
2888 
2889 	/* Any other case is not a normal situation and we should fail. */
2890 	return 0;
2891 }
2892 
2893 /* The function will create a new filter, read its parameters from the input
2894  * stream and add it to the filter collection. */
parse_filter(struct archive_read * ar,const uint8_t * p)2895 static int parse_filter(struct archive_read* ar, const uint8_t* p) {
2896 	uint32_t block_start, block_length;
2897 	uint16_t filter_type;
2898 	struct filter_info* filt = NULL;
2899 	struct rar5* rar = get_context(ar);
2900 	int ret;
2901 
2902 	/* Read the parameters from the input stream. */
2903 	if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_start)))
2904 		return ret;
2905 
2906 	if(ARCHIVE_OK != (ret = parse_filter_data(ar, rar, p, &block_length)))
2907 		return ret;
2908 
2909 	if(ARCHIVE_OK != (ret = read_bits_16(ar, rar, p, &filter_type)))
2910 		return ret;
2911 
2912 	filter_type >>= 13;
2913 	skip_bits(rar, 3);
2914 
2915 	/* Perform some sanity checks on this filter parameters. Note that we
2916 	 * allow only DELTA, E8/E9 and ARM filters here, because rest of
2917 	 * filters are not used in RARv5. */
2918 
2919 	if(block_length < 4 ||
2920 	    block_length > 0x400000 ||
2921 	    filter_type > FILTER_ARM ||
2922 	    !is_valid_filter_block_start(rar, block_start))
2923 	{
2924 		archive_set_error(&ar->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2925 		    "Invalid filter encountered");
2926 		return ARCHIVE_FATAL;
2927 	}
2928 
2929 	/* Allocate a new filter. */
2930 	filt = add_new_filter(rar);
2931 	if(filt == NULL) {
2932 		archive_set_error(&ar->archive, ENOMEM,
2933 		    "Can't allocate memory for a filter descriptor.");
2934 		return ARCHIVE_FATAL;
2935 	}
2936 
2937 	filt->type = filter_type;
2938 	filt->block_start = rar->cstate.write_ptr + block_start;
2939 	filt->block_length = block_length;
2940 
2941 	rar->cstate.last_block_start = filt->block_start;
2942 	rar->cstate.last_block_length = filt->block_length;
2943 
2944 	/* Read some more data in case this is a DELTA filter. Other filter
2945 	 * types don't require any additional data over what was already
2946 	 * read. */
2947 	if(filter_type == FILTER_DELTA) {
2948 		int channels;
2949 
2950 		if(ARCHIVE_OK != (ret = read_consume_bits(ar, rar, p, 5, &channels)))
2951 			return ret;
2952 
2953 		filt->channels = channels + 1;
2954 	}
2955 
2956 	return ARCHIVE_OK;
2957 }
2958 
decode_code_length(struct archive_read * a,struct rar5 * rar,const uint8_t * p,uint16_t code)2959 static int decode_code_length(struct archive_read* a, struct rar5* rar,
2960 	const uint8_t* p, uint16_t code)
2961 {
2962 	int lbits, length = 2;
2963 
2964 	if(code < 8) {
2965 		lbits = 0;
2966 		length += code;
2967 	} else {
2968 		lbits = code / 4 - 1;
2969 		length += (4 | (code & 3)) << lbits;
2970 	}
2971 
2972 	if(lbits > 0) {
2973 		int add;
2974 
2975 		if(ARCHIVE_OK != read_consume_bits(a, rar, p, lbits, &add))
2976 			return -1;
2977 
2978 		length += add;
2979 	}
2980 
2981 	return length;
2982 }
2983 
copy_string(struct archive_read * a,int len,int dist)2984 static int copy_string(struct archive_read* a, int len, int dist) {
2985 	struct rar5* rar = get_context(a);
2986 	const uint64_t cmask = rar->cstate.window_mask;
2987 	const uint64_t write_ptr = rar->cstate.write_ptr +
2988 	    rar->cstate.solid_offset;
2989 	int i;
2990 
2991 	if (rar->cstate.window_buf == NULL)
2992 		return ARCHIVE_FATAL;
2993 
2994 	/* The unpacker spends most of the time in this function. It would be
2995 	 * a good idea to introduce some optimizations here.
2996 	 *
2997 	 * Just remember that this loop treats buffers that overlap differently
2998 	 * than buffers that do not overlap. This is why a simple memcpy(3)
2999 	 * call will not be enough. */
3000 
3001 	for(i = 0; i < len; i++) {
3002 		const ssize_t write_idx = (write_ptr + i) & cmask;
3003 		const ssize_t read_idx = (write_ptr + i - dist) & cmask;
3004 		rar->cstate.window_buf[write_idx] =
3005 		    rar->cstate.window_buf[read_idx];
3006 	}
3007 
3008 	rar->cstate.write_ptr += len;
3009 	return ARCHIVE_OK;
3010 }
3011 
do_uncompress_block(struct archive_read * a,const uint8_t * p)3012 static int do_uncompress_block(struct archive_read* a, const uint8_t* p) {
3013 	struct rar5* rar = get_context(a);
3014 	uint16_t num;
3015 	int ret;
3016 
3017 	const uint64_t cmask = rar->cstate.window_mask;
3018 	const struct compressed_block_header* hdr = &rar->last_block_hdr;
3019 	const uint8_t bit_size = 1 + bf_bit_size(hdr);
3020 
3021 	while(1) {
3022 		if(rar->cstate.write_ptr - rar->cstate.last_write_ptr >
3023 		    (rar->cstate.window_size >> 1)) {
3024 			/* Don't allow growing data by more than half of the
3025 			 * window size at a time. In such case, break the loop;
3026 			 *  next call to this function will continue processing
3027 			 *  from this moment. */
3028 			break;
3029 		}
3030 
3031 		if(rar->bits.in_addr > rar->cstate.cur_block_size - 1 ||
3032 		    (rar->bits.in_addr == rar->cstate.cur_block_size - 1 &&
3033 		    rar->bits.bit_addr >= bit_size))
3034 		{
3035 			/* If the program counter is here, it means the
3036 			 * function has finished processing the block. */
3037 			rar->cstate.block_parsing_finished = 1;
3038 			break;
3039 		}
3040 
3041 		/* Decode the next literal. */
3042 		if(ARCHIVE_OK != decode_number(a, &rar->cstate.ld, p, &num)) {
3043 			return ARCHIVE_EOF;
3044 		}
3045 
3046 		/* Num holds a decompression literal, or 'command code'.
3047 		 *
3048 		 * - Values lower than 256 are just bytes. Those codes
3049 		 *   can be stored in the output buffer directly.
3050 		 *
3051 		 * - Code 256 defines a new filter, which is later used to
3052 		 *   ransform the data block accordingly to the filter type.
3053 		 *   The data block needs to be fully uncompressed first.
3054 		 *
3055 		 * - Code bigger than 257 and smaller than 262 define
3056 		 *   a repetition pattern that should be copied from
3057 		 *   an already uncompressed chunk of data.
3058 		 */
3059 
3060 		if(num < 256) {
3061 			/* Directly store the byte. */
3062 			int64_t write_idx = rar->cstate.solid_offset +
3063 			    rar->cstate.write_ptr++;
3064 
3065 			rar->cstate.window_buf[write_idx & cmask] =
3066 			    (uint8_t) num;
3067 			continue;
3068 		} else if(num >= 262) {
3069 			uint16_t dist_slot;
3070 			int len = decode_code_length(a, rar, p, num - 262),
3071 				dbits,
3072 				dist = 1;
3073 
3074 			if(len == -1) {
3075 				archive_set_error(&a->archive,
3076 				    ARCHIVE_ERRNO_PROGRAMMER,
3077 				    "Failed to decode the code length");
3078 
3079 				return ARCHIVE_FATAL;
3080 			}
3081 
3082 			if(ARCHIVE_OK != decode_number(a, &rar->cstate.dd, p,
3083 			    &dist_slot))
3084 			{
3085 				archive_set_error(&a->archive,
3086 				    ARCHIVE_ERRNO_PROGRAMMER,
3087 				    "Failed to decode the distance slot");
3088 
3089 				return ARCHIVE_FATAL;
3090 			}
3091 
3092 			if(dist_slot < 4) {
3093 				dbits = 0;
3094 				dist += dist_slot;
3095 			} else {
3096 				dbits = dist_slot / 2 - 1;
3097 
3098 				/* Cast to uint32_t will make sure the shift
3099 				 * left operation won't produce undefined
3100 				 * result. Then, the uint32_t type will
3101 				 * be implicitly casted to int. */
3102 				dist += (uint32_t) (2 |
3103 				    (dist_slot & 1)) << dbits;
3104 			}
3105 
3106 			if(dbits > 0) {
3107 				if(dbits >= 4) {
3108 					uint32_t add = 0;
3109 					uint16_t low_dist;
3110 
3111 					if(dbits > 4) {
3112 						if(ARCHIVE_OK != (ret = read_bits_32(
3113 						    a, rar, p, &add))) {
3114 							/* Return EOF if we
3115 							 * can't read more
3116 							 * data. */
3117 							return ret;
3118 						}
3119 
3120 						skip_bits(rar, dbits - 4);
3121 						add = (add >> (
3122 						    36 - dbits)) << 4;
3123 						dist += add;
3124 					}
3125 
3126 					if(ARCHIVE_OK != decode_number(a,
3127 					    &rar->cstate.ldd, p, &low_dist))
3128 					{
3129 						archive_set_error(&a->archive,
3130 						    ARCHIVE_ERRNO_PROGRAMMER,
3131 						    "Failed to decode the "
3132 						    "distance slot");
3133 
3134 						return ARCHIVE_FATAL;
3135 					}
3136 
3137 					if(dist >= INT_MAX - low_dist - 1) {
3138 						/* This only happens in
3139 						 * invalid archives. */
3140 						archive_set_error(&a->archive,
3141 						    ARCHIVE_ERRNO_FILE_FORMAT,
3142 						    "Distance pointer "
3143 						    "overflow");
3144 						return ARCHIVE_FATAL;
3145 					}
3146 
3147 					dist += low_dist;
3148 				} else {
3149 					/* dbits is one of [0,1,2,3] */
3150 					int add;
3151 
3152 					if(ARCHIVE_OK != (ret = read_consume_bits(a, rar,
3153 					     p, dbits, &add))) {
3154 						/* Return EOF if we can't read
3155 						 * more data. */
3156 						return ret;
3157 					}
3158 
3159 					dist += add;
3160 				}
3161 			}
3162 
3163 			if(dist > 0x100) {
3164 				len++;
3165 
3166 				if(dist > 0x2000) {
3167 					len++;
3168 
3169 					if(dist > 0x40000) {
3170 						len++;
3171 					}
3172 				}
3173 			}
3174 
3175 			dist_cache_push(rar, dist);
3176 			rar->cstate.last_len = len;
3177 
3178 			if(ARCHIVE_OK != copy_string(a, len, dist))
3179 				return ARCHIVE_FATAL;
3180 
3181 			continue;
3182 		} else if(num == 256) {
3183 			/* Create a filter. */
3184 			ret = parse_filter(a, p);
3185 			if(ret != ARCHIVE_OK)
3186 				return ret;
3187 
3188 			continue;
3189 		} else if(num == 257) {
3190 			if(rar->cstate.last_len != 0) {
3191 				if(ARCHIVE_OK != copy_string(a,
3192 				    rar->cstate.last_len,
3193 				    rar->cstate.dist_cache[0]))
3194 				{
3195 					return ARCHIVE_FATAL;
3196 				}
3197 			}
3198 
3199 			continue;
3200 		} else {
3201 			/* num < 262 */
3202 			const int idx = num - 258;
3203 			const int dist = dist_cache_touch(rar, idx);
3204 
3205 			uint16_t len_slot;
3206 			int len;
3207 
3208 			if(ARCHIVE_OK != decode_number(a, &rar->cstate.rd, p,
3209 			    &len_slot)) {
3210 				return ARCHIVE_FATAL;
3211 			}
3212 
3213 			len = decode_code_length(a, rar, p, len_slot);
3214 			if (len == -1) {
3215 				return ARCHIVE_FATAL;
3216 			}
3217 
3218 			rar->cstate.last_len = len;
3219 
3220 			if(ARCHIVE_OK != copy_string(a, len, dist))
3221 				return ARCHIVE_FATAL;
3222 
3223 			continue;
3224 		}
3225 	}
3226 
3227 	return ARCHIVE_OK;
3228 }
3229 
3230 /* Binary search for the RARv5 signature. */
scan_for_signature(struct archive_read * a)3231 static int scan_for_signature(struct archive_read* a) {
3232 	const uint8_t* p;
3233 	const int chunk_size = 512;
3234 	ssize_t i;
3235 	char signature[sizeof(rar5_signature_xor)];
3236 
3237 	/* If we're here, it means we're on an 'unknown territory' data.
3238 	 * There's no indication what kind of data we're reading here.
3239 	 * It could be some text comment, any kind of binary data,
3240 	 * digital sign, dragons, etc.
3241 	 *
3242 	 * We want to find a valid RARv5 magic header inside this unknown
3243 	 * data. */
3244 
3245 	/* Is it possible in libarchive to just skip everything until the
3246 	 * end of the file? If so, it would be a better approach than the
3247 	 * current implementation of this function. */
3248 
3249 	rar5_signature(signature);
3250 
3251 	while(1) {
3252 		if(!read_ahead(a, chunk_size, &p))
3253 			return ARCHIVE_EOF;
3254 
3255 		for(i = 0; i < chunk_size - (int)sizeof(rar5_signature_xor);
3256 		    i++) {
3257 			if(memcmp(&p[i], signature,
3258 			    sizeof(rar5_signature_xor)) == 0) {
3259 				/* Consume the number of bytes we've used to
3260 				 * search for the signature, as well as the
3261 				 * number of bytes used by the signature
3262 				 * itself. After this we should be standing
3263 				 * on a valid base block header. */
3264 				(void) consume(a,
3265 				    i + sizeof(rar5_signature_xor));
3266 				return ARCHIVE_OK;
3267 			}
3268 		}
3269 
3270 		consume(a, chunk_size);
3271 	}
3272 
3273 	return ARCHIVE_FATAL;
3274 }
3275 
3276 /* This function will switch the multivolume archive file to another file,
3277  * i.e. from part03 to part 04. */
advance_multivolume(struct archive_read * a)3278 static int advance_multivolume(struct archive_read* a) {
3279 	int lret;
3280 	struct rar5* rar = get_context(a);
3281 
3282 	/* A small state machine that will skip unnecessary data, needed to
3283 	 * switch from one multivolume to another. Such skipping is needed if
3284 	 * we want to be an stream-oriented (instead of file-oriented)
3285 	 * unpacker.
3286 	 *
3287 	 * The state machine starts with `rar->main.endarc` == 0. It also
3288 	 * assumes that current stream pointer points to some base block
3289 	 * header.
3290 	 *
3291 	 * The `endarc` field is being set when the base block parsing
3292 	 * function encounters the 'end of archive' marker.
3293 	 */
3294 
3295 	while(1) {
3296 		if(rar->main.endarc == 1) {
3297 			int looping = 1;
3298 
3299 			rar->main.endarc = 0;
3300 
3301 			while(looping) {
3302 				lret = skip_base_block(a);
3303 				switch(lret) {
3304 					case ARCHIVE_RETRY:
3305 						/* Continue looping. */
3306 						break;
3307 					case ARCHIVE_OK:
3308 						/* Break loop. */
3309 						looping = 0;
3310 						break;
3311 					default:
3312 						/* Forward any errors to the
3313 						 * caller. */
3314 						return lret;
3315 				}
3316 			}
3317 
3318 			break;
3319 		} else {
3320 			/* Skip current base block. In order to properly skip
3321 			 * it, we really need to simply parse it and discard
3322 			 * the results. */
3323 
3324 			lret = skip_base_block(a);
3325 			if(lret == ARCHIVE_FATAL || lret == ARCHIVE_FAILED)
3326 				return lret;
3327 
3328 			/* The `skip_base_block` function tells us if we
3329 			 * should continue with skipping, or we should stop
3330 			 * skipping. We're trying to skip everything up to
3331 			 * a base FILE block. */
3332 
3333 			if(lret != ARCHIVE_RETRY) {
3334 				/* If there was an error during skipping, or we
3335 				 * have just skipped a FILE base block... */
3336 
3337 				if(rar->main.endarc == 0) {
3338 					return lret;
3339 				} else {
3340 					continue;
3341 				}
3342 			}
3343 		}
3344 	}
3345 
3346 	return ARCHIVE_OK;
3347 }
3348 
3349 /* Merges the partial block from the first multivolume archive file, and
3350  * partial block from the second multivolume archive file. The result is
3351  * a chunk of memory containing the whole block, and the stream pointer
3352  * is advanced to the next block in the second multivolume archive file. */
merge_block(struct archive_read * a,ssize_t block_size,const uint8_t ** p)3353 static int merge_block(struct archive_read* a, ssize_t block_size,
3354     const uint8_t** p)
3355 {
3356 	struct rar5* rar = get_context(a);
3357 	ssize_t cur_block_size, partial_offset = 0;
3358 	const uint8_t* lp;
3359 	int ret;
3360 
3361 	if(rar->merge_mode) {
3362 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3363 		    "Recursive merge is not allowed");
3364 
3365 		return ARCHIVE_FATAL;
3366 	}
3367 
3368 	/* Set a flag that we're in the switching mode. */
3369 	rar->cstate.switch_multivolume = 1;
3370 
3371 	/* Reallocate the memory which will hold the whole block. */
3372 	if(rar->vol.push_buf)
3373 		free((void*) rar->vol.push_buf);
3374 
3375 	/* Increasing the allocation block by 8 is due to bit reading functions,
3376 	 * which are using additional 2 or 4 bytes. Allocating the block size
3377 	 * by exact value would make bit reader perform reads from invalid
3378 	 * memory block when reading the last byte from the buffer. */
3379 	rar->vol.push_buf = malloc(block_size + 8);
3380 	if(!rar->vol.push_buf) {
3381 		archive_set_error(&a->archive, ENOMEM,
3382 		    "Can't allocate memory for a merge block buffer.");
3383 		return ARCHIVE_FATAL;
3384 	}
3385 
3386 	/* Valgrind complains if the extension block for bit reader is not
3387 	 * initialized, so initialize it. */
3388 	memset(&rar->vol.push_buf[block_size], 0, 8);
3389 
3390 	/* A single block can span across multiple multivolume archive files,
3391 	 * so we use a loop here. This loop will consume enough multivolume
3392 	 * archive files until the whole block is read. */
3393 
3394 	while(1) {
3395 		/* Get the size of current block chunk in this multivolume
3396 		 * archive file and read it. */
3397 		cur_block_size = rar5_min(rar->file.bytes_remaining,
3398 		    block_size - partial_offset);
3399 
3400 		if(cur_block_size == 0) {
3401 			archive_set_error(&a->archive,
3402 			    ARCHIVE_ERRNO_FILE_FORMAT,
3403 			    "Encountered block size == 0 during block merge");
3404 			return ARCHIVE_FATAL;
3405 		}
3406 
3407 		if(!read_ahead(a, cur_block_size, &lp))
3408 			return ARCHIVE_EOF;
3409 
3410 		/* Sanity check; there should never be a situation where this
3411 		 * function reads more data than the block's size. */
3412 		if(partial_offset + cur_block_size > block_size) {
3413 			archive_set_error(&a->archive,
3414 			    ARCHIVE_ERRNO_PROGRAMMER,
3415 			    "Consumed too much data when merging blocks.");
3416 			return ARCHIVE_FATAL;
3417 		}
3418 
3419 		/* Merge previous block chunk with current block chunk,
3420 		 * or create first block chunk if this is our first
3421 		 * iteration. */
3422 		memcpy(&rar->vol.push_buf[partial_offset], lp, cur_block_size);
3423 
3424 		/* Advance the stream read pointer by this block chunk size. */
3425 		if(ARCHIVE_OK != consume(a, cur_block_size))
3426 			return ARCHIVE_EOF;
3427 
3428 		/* Update the pointers. `partial_offset` contains information
3429 		 * about the sum of merged block chunks. */
3430 		partial_offset += cur_block_size;
3431 		rar->file.bytes_remaining -= cur_block_size;
3432 
3433 		/* If `partial_offset` is the same as `block_size`, this means
3434 		 * we've merged all block chunks and we have a valid full
3435 		 * block. */
3436 		if(partial_offset == block_size) {
3437 			break;
3438 		}
3439 
3440 		/* If we don't have any bytes to read, this means we should
3441 		 * switch to another multivolume archive file. */
3442 		if(rar->file.bytes_remaining == 0) {
3443 			rar->merge_mode++;
3444 			ret = advance_multivolume(a);
3445 			rar->merge_mode--;
3446 			if(ret != ARCHIVE_OK) {
3447 				return ret;
3448 			}
3449 		}
3450 	}
3451 
3452 	*p = rar->vol.push_buf;
3453 
3454 	/* If we're here, we can resume unpacking by processing the block
3455 	 * pointed to by the `*p` memory pointer. */
3456 
3457 	return ARCHIVE_OK;
3458 }
3459 
process_block(struct archive_read * a)3460 static int process_block(struct archive_read* a) {
3461 	const uint8_t* p;
3462 	struct rar5* rar = get_context(a);
3463 	int ret;
3464 
3465 	/* If we don't have any data to be processed, this most probably means
3466 	 * we need to switch to the next volume. */
3467 	if(rar->main.volume && rar->file.bytes_remaining == 0) {
3468 		ret = advance_multivolume(a);
3469 		if(ret != ARCHIVE_OK)
3470 			return ret;
3471 	}
3472 
3473 	if(rar->cstate.block_parsing_finished) {
3474 		ssize_t block_size;
3475 		ssize_t to_skip;
3476 		ssize_t cur_block_size;
3477 
3478 		/* The header size won't be bigger than 6 bytes. */
3479 		if(!read_ahead(a, 6, &p)) {
3480 			/* Failed to prefetch data block header. */
3481 			return ARCHIVE_EOF;
3482 		}
3483 
3484 		/*
3485 		 * Read block_size by parsing block header. Validate the header
3486 		 * by calculating CRC byte stored inside the header. Size of
3487 		 * the header is not constant (block size can be stored either
3488 		 * in 1 or 2 bytes), that's why block size is left out from the
3489 		 * `compressed_block_header` structure and returned by
3490 		 * `parse_block_header` as the second argument. */
3491 
3492 		ret = parse_block_header(a, p, &block_size,
3493 		    &rar->last_block_hdr);
3494 		if(ret != ARCHIVE_OK) {
3495 			return ret;
3496 		}
3497 
3498 		/* Skip block header. Next data is huffman tables,
3499 		 * if present. */
3500 		to_skip = sizeof(struct compressed_block_header) +
3501 			bf_byte_count(&rar->last_block_hdr) + 1;
3502 
3503 		if(ARCHIVE_OK != consume(a, to_skip))
3504 			return ARCHIVE_EOF;
3505 
3506 		rar->file.bytes_remaining -= to_skip;
3507 
3508 		/* The block size gives information about the whole block size,
3509 		 * but the block could be stored in split form when using
3510 		 * multi-volume archives. In this case, the block size will be
3511 		 * bigger than the actual data stored in this file. Remaining
3512 		 * part of the data will be in another file. */
3513 
3514 		cur_block_size =
3515 			rar5_min(rar->file.bytes_remaining, block_size);
3516 
3517 		if(block_size > rar->file.bytes_remaining) {
3518 			/* If current blocks' size is bigger than our data
3519 			 * size, this means we have a multivolume archive.
3520 			 * In this case, skip all base headers until the end
3521 			 * of the file, proceed to next "partXXX.rar" volume,
3522 			 * find its signature, skip all headers up to the first
3523 			 * FILE base header, and continue from there.
3524 			 *
3525 			 * Note that `merge_block` will update the `rar`
3526 			 * context structure quite extensively. */
3527 
3528 			ret = merge_block(a, block_size, &p);
3529 			if(ret != ARCHIVE_OK) {
3530 				return ret;
3531 			}
3532 
3533 			cur_block_size = block_size;
3534 
3535 			/* Current stream pointer should be now directly
3536 			 * *after* the block that spanned through multiple
3537 			 * archive files. `p` pointer should have the data of
3538 			 * the *whole* block (merged from partial blocks
3539 			 * stored in multiple archives files). */
3540 		} else {
3541 			rar->cstate.switch_multivolume = 0;
3542 
3543 			/* Read the whole block size into memory. This can take
3544 			 * up to  8 megabytes of memory in theoretical cases.
3545 			 * Might be worth to optimize this and use a standard
3546 			 * chunk of 4kb's. */
3547 			if(!read_ahead(a, 4 + cur_block_size, &p)) {
3548 				/* Failed to prefetch block data. */
3549 				return ARCHIVE_EOF;
3550 			}
3551 		}
3552 
3553 		rar->cstate.block_buf = p;
3554 		rar->cstate.cur_block_size = cur_block_size;
3555 		rar->cstate.block_parsing_finished = 0;
3556 
3557 		rar->bits.in_addr = 0;
3558 		rar->bits.bit_addr = 0;
3559 
3560 		if(bf_is_table_present(&rar->last_block_hdr)) {
3561 			/* Load Huffman tables. */
3562 			ret = parse_tables(a, rar, p);
3563 			if(ret != ARCHIVE_OK) {
3564 				/* Error during decompression of Huffman
3565 				 * tables. */
3566 				return ret;
3567 			}
3568 		}
3569 	} else {
3570 		/* Block parsing not finished, reuse previous memory buffer. */
3571 		p = rar->cstate.block_buf;
3572 	}
3573 
3574 	/* Uncompress the block, or a part of it, depending on how many bytes
3575 	 * will be generated by uncompressing the block.
3576 	 *
3577 	 * In case too many bytes will be generated, calling this function
3578 	 * again will resume the uncompression operation. */
3579 	ret = do_uncompress_block(a, p);
3580 	if(ret != ARCHIVE_OK) {
3581 		return ret;
3582 	}
3583 
3584 	if(rar->cstate.block_parsing_finished &&
3585 	    rar->cstate.switch_multivolume == 0 &&
3586 	    rar->cstate.cur_block_size > 0)
3587 	{
3588 		/* If we're processing a normal block, consume the whole
3589 		 * block. We can do this because we've already read the whole
3590 		 * block to memory. */
3591 		if(ARCHIVE_OK != consume(a, rar->cstate.cur_block_size))
3592 			return ARCHIVE_FATAL;
3593 
3594 		rar->file.bytes_remaining -= rar->cstate.cur_block_size;
3595 	} else if(rar->cstate.switch_multivolume) {
3596 		/* Don't consume the block if we're doing multivolume
3597 		 * processing. The volume switching function will consume
3598 		 * the proper count of bytes instead. */
3599 		rar->cstate.switch_multivolume = 0;
3600 	}
3601 
3602 	return ARCHIVE_OK;
3603 }
3604 
3605 /* Pops the `buf`, `size` and `offset` from the "data ready" stack.
3606  *
3607  * Returns ARCHIVE_OK when those arguments can be used, ARCHIVE_RETRY
3608  * when there is no data on the stack. */
use_data(struct rar5 * rar,const void ** buf,size_t * size,int64_t * offset)3609 static int use_data(struct rar5* rar, const void** buf, size_t* size,
3610     int64_t* offset)
3611 {
3612 	int i;
3613 
3614 	for(i = 0; i < rar5_countof(rar->cstate.dready); i++) {
3615 		struct data_ready *d = &rar->cstate.dready[i];
3616 
3617 		if(d->used) {
3618 			if(buf)    *buf = d->buf;
3619 			if(size)   *size = d->size;
3620 			if(offset) *offset = d->offset;
3621 
3622 			d->used = 0;
3623 			return ARCHIVE_OK;
3624 		}
3625 	}
3626 
3627 	return ARCHIVE_RETRY;
3628 }
3629 
3630 /* Pushes the `buf`, `size` and `offset` arguments to the rar->cstate.dready
3631  * FIFO stack. Those values will be popped from this stack by the `use_data`
3632  * function. */
push_data_ready(struct archive_read * a,struct rar5 * rar,const uint8_t * buf,size_t size,int64_t offset)3633 static int push_data_ready(struct archive_read* a, struct rar5* rar,
3634     const uint8_t* buf, size_t size, int64_t offset)
3635 {
3636 	int i;
3637 
3638 	/* Don't push if we're in skip mode. This is needed because solid
3639 	 * streams need full processing even if we're skipping data. After
3640 	 * fully processing the stream, we need to discard the generated bytes,
3641 	 * because we're interested only in the side effect: building up the
3642 	 * internal window circular buffer. This window buffer will be used
3643 	 * later during unpacking of requested data. */
3644 	if(rar->skip_mode)
3645 		return ARCHIVE_OK;
3646 
3647 	/* Sanity check. */
3648 	if(offset != rar->file.last_offset + rar->file.last_size) {
3649 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3650 		    "Sanity check error: output stream is not continuous");
3651 		return ARCHIVE_FATAL;
3652 	}
3653 
3654 	for(i = 0; i < rar5_countof(rar->cstate.dready); i++) {
3655 		struct data_ready* d = &rar->cstate.dready[i];
3656 		if(!d->used) {
3657 			d->used = 1;
3658 			d->buf = buf;
3659 			d->size = size;
3660 			d->offset = offset;
3661 
3662 			/* These fields are used only in sanity checking. */
3663 			rar->file.last_offset = offset;
3664 			rar->file.last_size = size;
3665 
3666 			/* Calculate the checksum of this new block before
3667 			 * submitting data to libarchive's engine. */
3668 			update_crc(rar, d->buf, d->size);
3669 
3670 			return ARCHIVE_OK;
3671 		}
3672 	}
3673 
3674 	/* Program counter will reach this code if the `rar->cstate.data_ready`
3675 	 * stack will be filled up so that no new entries will be allowed. The
3676 	 * code shouldn't allow such situation to occur. So we treat this case
3677 	 * as an internal error. */
3678 
3679 	archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
3680 	    "Error: premature end of data_ready stack");
3681 	return ARCHIVE_FATAL;
3682 }
3683 
3684 /* This function uncompresses the data that is stored in the <FILE> base
3685  * block.
3686  *
3687  * The FILE base block looks like this:
3688  *
3689  * <header><huffman tables><block_1><block_2>...<block_n>
3690  *
3691  * The <header> is a block header, that is parsed in parse_block_header().
3692  * It's a "compressed_block_header" structure, containing metadata needed
3693  * to know when we should stop looking for more <block_n> blocks.
3694  *
3695  * <huffman tables> contain data needed to set up the huffman tables, needed
3696  * for the actual decompression.
3697  *
3698  * Each <block_n> consists of series of literals:
3699  *
3700  * <literal><literal><literal>...<literal>
3701  *
3702  * Those literals generate the uncompression data. They operate on a circular
3703  * buffer, sometimes writing raw data into it, sometimes referencing
3704  * some previous data inside this buffer, and sometimes declaring a filter
3705  * that will need to be executed on the data stored in the circular buffer.
3706  * It all depends on the literal that is used.
3707  *
3708  * Sometimes blocks produce output data, sometimes they don't. For example, for
3709  * some huge files that use lots of filters, sometimes a block is filled with
3710  * only filter declaration literals. Such blocks won't produce any data in the
3711  * circular buffer.
3712  *
3713  * Sometimes blocks will produce 4 bytes of data, and sometimes 1 megabyte,
3714  * because a literal can reference previously decompressed data. For example,
3715  * there can be a literal that says: 'append a byte 0xFE here', and after
3716  * it another literal can say 'append 1 megabyte of data from circular buffer
3717  * offset 0x12345'. This is how RAR format handles compressing repeated
3718  * patterns.
3719  *
3720  * The RAR compressor creates those literals and the actual efficiency of
3721  * compression depends on what those literals are. The literals can also
3722  * be seen as a kind of a non-turing-complete virtual machine that simply
3723  * tells the decompressor what it should do.
3724  * */
3725 
do_uncompress_file(struct archive_read * a)3726 static int do_uncompress_file(struct archive_read* a) {
3727 	struct rar5* rar = get_context(a);
3728 	int ret;
3729 	int64_t max_end_pos;
3730 
3731 	if(!rar->cstate.initialized) {
3732 		/* Don't perform full context reinitialization if we're
3733 		 * processing a solid archive. */
3734 		if(!rar->main.solid || !rar->cstate.window_buf) {
3735 			init_unpack(rar);
3736 		}
3737 
3738 		rar->cstate.initialized = 1;
3739 	}
3740 
3741 	/* Don't allow extraction if window_size is invalid. */
3742 	if(rar->cstate.window_size == 0) {
3743 		archive_set_error(&a->archive,
3744 			ARCHIVE_ERRNO_FILE_FORMAT,
3745 			"Invalid window size declaration in this file");
3746 
3747 		/* This should never happen in valid files. */
3748 		return ARCHIVE_FATAL;
3749 	}
3750 
3751 	if(rar->cstate.all_filters_applied == 1) {
3752 		/* We use while(1) here, but standard case allows for just 1
3753 		 * iteration. The loop will iterate if process_block() didn't
3754 		 * generate any data at all. This can happen if the block
3755 		 * contains only filter definitions (this is common in big
3756 		 * files). */
3757 		while(1) {
3758 			ret = process_block(a);
3759 			if(ret == ARCHIVE_EOF || ret == ARCHIVE_FATAL)
3760 				return ret;
3761 
3762 			if(rar->cstate.last_write_ptr ==
3763 			    rar->cstate.write_ptr) {
3764 				/* The block didn't generate any new data,
3765 				 * so just process a new block if this one
3766 				 * wasn't the last block in the file. */
3767 				if (bf_is_last_block(&rar->last_block_hdr)) {
3768 					return ARCHIVE_EOF;
3769 				}
3770 
3771 				continue;
3772 			}
3773 
3774 			/* The block has generated some new data, so break
3775 			 * the loop. */
3776 			break;
3777 		}
3778 	}
3779 
3780 	/* Try to run filters. If filters won't be applied, it means that
3781 	 * insufficient data was generated. */
3782 	ret = apply_filters(a);
3783 	if(ret == ARCHIVE_RETRY) {
3784 		return ARCHIVE_OK;
3785 	} else if(ret == ARCHIVE_FATAL) {
3786 		return ARCHIVE_FATAL;
3787 	}
3788 
3789 	/* If apply_filters() will return ARCHIVE_OK, we can continue here. */
3790 
3791 	if(cdeque_size(&rar->cstate.filters) > 0) {
3792 		/* Check if we can write something before hitting first
3793 		 * filter. */
3794 		struct filter_info* flt;
3795 
3796 		/* Get the block_start offset from the first filter. */
3797 		if(CDE_OK != cdeque_front(&rar->cstate.filters,
3798 		    cdeque_filter_p(&flt)))
3799 		{
3800 			archive_set_error(&a->archive,
3801 			    ARCHIVE_ERRNO_PROGRAMMER,
3802 			    "Can't read first filter");
3803 			return ARCHIVE_FATAL;
3804 		}
3805 
3806 		max_end_pos = rar5_min(flt->block_start,
3807 		    rar->cstate.write_ptr);
3808 	} else {
3809 		/* There are no filters defined, or all filters were applied.
3810 		 * This means we can just store the data without any
3811 		 * postprocessing. */
3812 		max_end_pos = rar->cstate.write_ptr;
3813 	}
3814 
3815 	if(max_end_pos == rar->cstate.last_write_ptr) {
3816 		/* We can't write anything yet. The block uncompression
3817 		 * function did not generate enough data, and no filter can be
3818 		 * applied. At the same time we don't have any data that can be
3819 		 *  stored without filter postprocessing. This means we need to
3820 		 *  wait for more data to be generated, so we can apply the
3821 		 * filters.
3822 		 *
3823 		 * Signal the caller that we need more data to be able to do
3824 		 * anything.
3825 		 */
3826 		return ARCHIVE_RETRY;
3827 	} else {
3828 		/* We can write the data before hitting the first filter.
3829 		 * So let's do it. The push_window_data() function will
3830 		 * effectively return the selected data block to the user
3831 		 * application. */
3832 		push_window_data(a, rar, rar->cstate.last_write_ptr,
3833 		    max_end_pos);
3834 		rar->cstate.last_write_ptr = max_end_pos;
3835 	}
3836 
3837 	return ARCHIVE_OK;
3838 }
3839 
uncompress_file(struct archive_read * a)3840 static int uncompress_file(struct archive_read* a) {
3841 	int ret;
3842 
3843 	while(1) {
3844 		/* Sometimes the uncompression function will return a
3845 		 * 'retry' signal. If this will happen, we have to retry
3846 		 * the function. */
3847 		ret = do_uncompress_file(a);
3848 		if(ret != ARCHIVE_RETRY)
3849 			return ret;
3850 	}
3851 }
3852 
3853 
do_unstore_file(struct archive_read * a,struct rar5 * rar,const void ** buf,size_t * size,int64_t * offset)3854 static int do_unstore_file(struct archive_read* a,
3855     struct rar5* rar, const void** buf, size_t* size, int64_t* offset)
3856 {
3857 	size_t to_read;
3858 	const uint8_t* p;
3859 
3860 	if(rar->file.bytes_remaining == 0 && rar->main.volume > 0 &&
3861 	    rar->generic.split_after > 0)
3862 	{
3863 		int ret;
3864 
3865 		rar->cstate.switch_multivolume = 1;
3866 		ret = advance_multivolume(a);
3867 		rar->cstate.switch_multivolume = 0;
3868 
3869 		if(ret != ARCHIVE_OK) {
3870 			/* Failed to advance to next multivolume archive
3871 			 * file. */
3872 			return ret;
3873 		}
3874 	}
3875 
3876 	to_read = rar5_min(rar->file.bytes_remaining, 64 * 1024);
3877 	if(to_read == 0) {
3878 		return ARCHIVE_EOF;
3879 	}
3880 
3881 	if(!read_ahead(a, to_read, &p)) {
3882 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
3883 		    "I/O error when unstoring file");
3884 		return ARCHIVE_FATAL;
3885 	}
3886 
3887 	if(ARCHIVE_OK != consume(a, to_read)) {
3888 		return ARCHIVE_EOF;
3889 	}
3890 
3891 	if(buf)    *buf = p;
3892 	if(size)   *size = to_read;
3893 	if(offset) *offset = rar->cstate.last_unstore_ptr;
3894 
3895 	rar->file.bytes_remaining -= to_read;
3896 	rar->cstate.last_unstore_ptr += to_read;
3897 
3898 	update_crc(rar, p, to_read);
3899 	return ARCHIVE_OK;
3900 }
3901 
do_unpack(struct archive_read * a,struct rar5 * rar,const void ** buf,size_t * size,int64_t * offset)3902 static int do_unpack(struct archive_read* a, struct rar5* rar,
3903     const void** buf, size_t* size, int64_t* offset)
3904 {
3905 	enum COMPRESSION_METHOD {
3906 		STORE = 0, FASTEST = 1, FAST = 2, NORMAL = 3, GOOD = 4,
3907 		BEST = 5
3908 	};
3909 
3910 	if(rar->file.service > 0) {
3911 		return do_unstore_file(a, rar, buf, size, offset);
3912 	} else {
3913 		switch(rar->cstate.method) {
3914 			case STORE:
3915 				return do_unstore_file(a, rar, buf, size,
3916 				    offset);
3917 			case FASTEST:
3918 				/* fallthrough */
3919 			case FAST:
3920 				/* fallthrough */
3921 			case NORMAL:
3922 				/* fallthrough */
3923 			case GOOD:
3924 				/* fallthrough */
3925 			case BEST:
3926 				/* No data is returned here. But because a sparse-file aware
3927 				 * caller (like archive_read_data_into_fd) may treat zero-size
3928 				 * as a sparse file block, we need to update the offset
3929 				 * accordingly. At this point the decoder doesn't have any
3930 				 * pending uncompressed data blocks, so the current position in
3931 				 * the output file should be last_write_ptr. */
3932 				if (offset) *offset = rar->cstate.last_write_ptr;
3933 				return uncompress_file(a);
3934 			default:
3935 				archive_set_error(&a->archive,
3936 				    ARCHIVE_ERRNO_FILE_FORMAT,
3937 				    "Compression method not supported: 0x%x",
3938 				    rar->cstate.method);
3939 
3940 				return ARCHIVE_FATAL;
3941 		}
3942 	}
3943 
3944 #if !defined WIN32
3945 	/* Not reached. */
3946 	return ARCHIVE_OK;
3947 #endif
3948 }
3949 
verify_checksums(struct archive_read * a)3950 static int verify_checksums(struct archive_read* a) {
3951 	int verify_crc;
3952 	struct rar5* rar = get_context(a);
3953 
3954 	/* Check checksums only when actually unpacking the data. There's no
3955 	 * need to calculate checksum when we're skipping data in solid archives
3956 	 * (skipping in solid archives is the same thing as unpacking compressed
3957 	 * data and discarding the result). */
3958 
3959 	if(!rar->skip_mode) {
3960 		/* Always check checksums if we're not in skip mode */
3961 		verify_crc = 1;
3962 	} else {
3963 		/* We can override the logic above with a compile-time option
3964 		 * NO_CRC_ON_SOLID_SKIP. This option is used during debugging,
3965 		 * and it will check checksums of unpacked data even when
3966 		 * we're skipping it. */
3967 
3968 #if defined CHECK_CRC_ON_SOLID_SKIP
3969 		/* Debug case */
3970 		verify_crc = 1;
3971 #else
3972 		/* Normal case */
3973 		verify_crc = 0;
3974 #endif
3975 	}
3976 
3977 	if(verify_crc) {
3978 		/* During unpacking, on each unpacked block we're calling the
3979 		 * update_crc() function. Since we are here, the unpacking
3980 		 * process is already over and we can check if calculated
3981 		 * checksum (CRC32 or BLAKE2sp) is the same as what is stored
3982 		 * in the archive. */
3983 		if(rar->file.stored_crc32 > 0) {
3984 			/* Check CRC32 only when the file contains a CRC32
3985 			 * value for this file. */
3986 
3987 			if(rar->file.calculated_crc32 !=
3988 			    rar->file.stored_crc32) {
3989 				/* Checksums do not match; the unpacked file
3990 				 * is corrupted. */
3991 
3992 				DEBUG_CODE {
3993 					printf("Checksum error: CRC32 "
3994 					    "(was: %08" PRIx32 ", expected: %08" PRIx32 ")\n",
3995 					    rar->file.calculated_crc32,
3996 					    rar->file.stored_crc32);
3997 				}
3998 
3999 #ifndef DONT_FAIL_ON_CRC_ERROR
4000 				archive_set_error(&a->archive,
4001 				    ARCHIVE_ERRNO_FILE_FORMAT,
4002 				    "Checksum error: CRC32");
4003 				return ARCHIVE_FATAL;
4004 #endif
4005 			} else {
4006 				DEBUG_CODE {
4007 					printf("Checksum OK: CRC32 "
4008 					    "(%08" PRIx32 "/%08" PRIx32 ")\n",
4009 					    rar->file.stored_crc32,
4010 					    rar->file.calculated_crc32);
4011 				}
4012 			}
4013 		}
4014 
4015 		if(rar->file.has_blake2 > 0) {
4016 			/* BLAKE2sp is an optional checksum algorithm that is
4017 			 * added to RARv5 archives when using the `-htb` switch
4018 			 *  during creation of archive.
4019 			 *
4020 			 * We now finalize the hash calculation by calling the
4021 			 * `final` function. This will generate the final hash
4022 			 * value we can use to compare it with the BLAKE2sp
4023 			 * checksum that is stored in the archive.
4024 			 *
4025 			 * The return value of this `final` function is not
4026 			 * very helpful, as it guards only against improper use.
4027  			 * This is why we're explicitly ignoring it. */
4028 
4029 			uint8_t b2_buf[32];
4030 			(void) blake2sp_final(&rar->file.b2state, b2_buf, 32);
4031 
4032 			if(memcmp(&rar->file.blake2sp, b2_buf, 32) != 0) {
4033 #ifndef DONT_FAIL_ON_CRC_ERROR
4034 				archive_set_error(&a->archive,
4035 				    ARCHIVE_ERRNO_FILE_FORMAT,
4036 				    "Checksum error: BLAKE2");
4037 
4038 				return ARCHIVE_FATAL;
4039 #endif
4040 			}
4041 		}
4042 	}
4043 
4044 	/* Finalization for this file has been successfully completed. */
4045 	return ARCHIVE_OK;
4046 }
4047 
verify_global_checksums(struct archive_read * a)4048 static int verify_global_checksums(struct archive_read* a) {
4049 	return verify_checksums(a);
4050 }
4051 
4052 /*
4053  * Decryption function for the magic signature pattern. Check the comment near
4054  * the `rar5_signature_xor` symbol to read the rationale behind this.
4055  */
rar5_signature(char * buf)4056 static void rar5_signature(char *buf) {
4057 		size_t i;
4058 
4059 		for(i = 0; i < sizeof(rar5_signature_xor); i++) {
4060 			buf[i] = rar5_signature_xor[i] ^ 0xA1;
4061 		}
4062 }
4063 
rar5_read_data(struct archive_read * a,const void ** buff,size_t * size,int64_t * offset)4064 static int rar5_read_data(struct archive_read *a, const void **buff,
4065     size_t *size, int64_t *offset) {
4066 	int ret;
4067 	struct rar5* rar = get_context(a);
4068 
4069 	if (size)
4070 		*size = 0;
4071 
4072 	if(rar->file.dir > 0) {
4073 		/* Don't process any data if this file entry was declared
4074 		 * as a directory. This is needed, because entries marked as
4075 		 * directory doesn't have any dictionary buffer allocated, so
4076 		 * it's impossible to perform any decompression. */
4077 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
4078 		    "Can't decompress an entry marked as a directory");
4079 		return ARCHIVE_FAILED;
4080 	}
4081 
4082 	if(!rar->skip_mode && (rar->cstate.last_write_ptr > rar->file.unpacked_size)) {
4083 		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
4084 		    "Unpacker has written too many bytes");
4085 		return ARCHIVE_FATAL;
4086 	}
4087 
4088 	ret = use_data(rar, buff, size, offset);
4089 	if(ret == ARCHIVE_OK) {
4090 		return ret;
4091 	}
4092 
4093 	if(rar->file.eof == 1) {
4094 		return ARCHIVE_EOF;
4095 	}
4096 
4097 	ret = do_unpack(a, rar, buff, size, offset);
4098 	if(ret != ARCHIVE_OK) {
4099 		return ret;
4100 	}
4101 
4102 	if(rar->file.bytes_remaining == 0 &&
4103 			rar->cstate.last_write_ptr == rar->file.unpacked_size)
4104 	{
4105 		/* If all bytes of current file were processed, run
4106 		 * finalization.
4107 		 *
4108 		 * Finalization will check checksum against proper values. If
4109 		 * some of the checksums will not match, we'll return an error
4110 		 * value in the last `archive_read_data` call to signal an error
4111 		 * to the user. */
4112 
4113 		rar->file.eof = 1;
4114 		return verify_global_checksums(a);
4115 	}
4116 
4117 	return ARCHIVE_OK;
4118 }
4119 
rar5_read_data_skip(struct archive_read * a)4120 static int rar5_read_data_skip(struct archive_read *a) {
4121 	struct rar5* rar = get_context(a);
4122 
4123 	if(rar->main.solid) {
4124 		/* In solid archives, instead of skipping the data, we need to
4125 		 * extract it, and dispose the result. The side effect of this
4126 		 * operation will be setting up the initial window buffer state
4127 		 * needed to be able to extract the selected file. */
4128 
4129 		int ret;
4130 
4131 		/* Make sure to process all blocks in the compressed stream. */
4132 		while(rar->file.bytes_remaining > 0) {
4133 			/* Setting the "skip mode" will allow us to skip
4134 			 * checksum checks during data skipping. Checking the
4135 			 * checksum of skipped data isn't really necessary and
4136 			 * it's only slowing things down.
4137 			 *
4138 			 * This is incremented instead of setting to 1 because
4139 			 * this data skipping function can be called
4140 			 * recursively. */
4141 			rar->skip_mode++;
4142 
4143 			/* We're disposing 1 block of data, so we use triple
4144 			 * NULLs in arguments. */
4145 			ret = rar5_read_data(a, NULL, NULL, NULL);
4146 
4147 			/* Turn off "skip mode". */
4148 			rar->skip_mode--;
4149 
4150 			if(ret < 0 || ret == ARCHIVE_EOF) {
4151 				/* Propagate any potential error conditions
4152 				 * to the caller. */
4153 				return ret;
4154 			}
4155 		}
4156 	} else {
4157 		/* In standard archives, we can just jump over the compressed
4158 		 * stream. Each file in non-solid archives starts from an empty
4159 		 * window buffer. */
4160 
4161 		if(ARCHIVE_OK != consume(a, rar->file.bytes_remaining)) {
4162 			return ARCHIVE_FATAL;
4163 		}
4164 
4165 		rar->file.bytes_remaining = 0;
4166 	}
4167 
4168 	return ARCHIVE_OK;
4169 }
4170 
rar5_seek_data(struct archive_read * a,int64_t offset,int whence)4171 static int64_t rar5_seek_data(struct archive_read *a, int64_t offset,
4172     int whence)
4173 {
4174 	(void) a;
4175 	(void) offset;
4176 	(void) whence;
4177 
4178 	/* We're a streaming unpacker, and we don't support seeking. */
4179 
4180 	return ARCHIVE_FATAL;
4181 }
4182 
rar5_cleanup(struct archive_read * a)4183 static int rar5_cleanup(struct archive_read *a) {
4184 	struct rar5* rar = get_context(a);
4185 
4186 	free(rar->cstate.window_buf);
4187 	free(rar->cstate.filtered_buf);
4188 
4189 	free(rar->vol.push_buf);
4190 
4191 	free_filters(rar);
4192 	cdeque_free(&rar->cstate.filters);
4193 
4194 	free(rar);
4195 	a->format->data = NULL;
4196 
4197 	return ARCHIVE_OK;
4198 }
4199 
rar5_capabilities(struct archive_read * a)4200 static int rar5_capabilities(struct archive_read * a) {
4201 	(void) a;
4202 	return 0;
4203 }
4204 
rar5_has_encrypted_entries(struct archive_read * _a)4205 static int rar5_has_encrypted_entries(struct archive_read *_a) {
4206 	(void) _a;
4207 
4208 	/* Unsupported for now. */
4209 	return ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED;
4210 }
4211 
rar5_init(struct rar5 * rar)4212 static int rar5_init(struct rar5* rar) {
4213 	memset(rar, 0, sizeof(struct rar5));
4214 
4215 	if(CDE_OK != cdeque_init(&rar->cstate.filters, 8192))
4216 		return ARCHIVE_FATAL;
4217 
4218 	return ARCHIVE_OK;
4219 }
4220 
archive_read_support_format_rar5(struct archive * _a)4221 int archive_read_support_format_rar5(struct archive *_a) {
4222 	struct archive_read* ar;
4223 	int ret;
4224 	struct rar5* rar;
4225 
4226 	if(ARCHIVE_OK != (ret = get_archive_read(_a, &ar)))
4227 		return ret;
4228 
4229 	rar = malloc(sizeof(*rar));
4230 	if(rar == NULL) {
4231 		archive_set_error(&ar->archive, ENOMEM,
4232 		    "Can't allocate rar5 data");
4233 		return ARCHIVE_FATAL;
4234 	}
4235 
4236 	if(ARCHIVE_OK != rar5_init(rar)) {
4237 		archive_set_error(&ar->archive, ENOMEM,
4238 		    "Can't allocate rar5 filter buffer");
4239 		free(rar);
4240 		return ARCHIVE_FATAL;
4241 	}
4242 
4243 	ret = __archive_read_register_format(ar,
4244 	    rar,
4245 	    "rar5",
4246 	    rar5_bid,
4247 	    rar5_options,
4248 	    rar5_read_header,
4249 	    rar5_read_data,
4250 	    rar5_read_data_skip,
4251 	    rar5_seek_data,
4252 	    rar5_cleanup,
4253 	    rar5_capabilities,
4254 	    rar5_has_encrypted_entries);
4255 
4256 	if(ret != ARCHIVE_OK) {
4257 		(void) rar5_cleanup(ar);
4258 	}
4259 
4260 	return ret;
4261 }
4262