1 /*
2 * Extract component parts of ARJ archives.
3 *
4 * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5 * Copyright (C) 2007-2013 Sourcefire, Inc.
6 *
7 * Authors: Trog
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21 * MA 02110-1301, USA.
22 */
23
24 #if HAVE_CONFIG_H
25 #include "clamav-config.h"
26 #endif
27
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #ifdef HAVE_UNISTD_H
32 #include <unistd.h>
33 #endif
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <fcntl.h>
37 #include <ctype.h>
38
39 #include "clamav.h"
40 #include "str.h"
41 #include "others.h"
42 #include "unarj.h"
43 #include "textnorm.h"
44
45 #define FIRST_HDR_SIZE 30
46 #define COMMENT_MAX 2048
47 #define FNAME_MAX 512
48 #define HEADERSIZE_MAX (FIRST_HDR_SIZE + 10 + FNAME_MAX + COMMENT_MAX)
49 #define MAXDICBIT 16
50 #define DDICSIZ 26624
51 #define THRESHOLD 3
52 #ifndef UCHAR_MAX
53 #define UCHAR_MAX (255)
54 #endif
55 #ifndef CHAR_BIT
56 #define CHAR_BIT (8)
57 #endif
58 #define MAXMATCH 256
59 #ifndef FALSE
60 #define FALSE (0)
61 #define TRUE (1)
62 #endif
63
64 #define CODE_BIT 16
65 #define NT (CODE_BIT + 3)
66 #define PBIT 5
67 #define TBIT 5
68 #define NC (UCHAR_MAX + MAXMATCH + 2 - THRESHOLD)
69 #define NP (MAXDICBIT + 1)
70 #define CBIT 9
71 #define CTABLESIZE 4096
72 #define PTABLESIZE 256
73 #define STRTP 9
74 #define STOPP 13
75
76 #define STRTL 0
77 #define STOPL 7
78
79 #if NT > NP
80 #define NPT NT
81 #else
82 #define NPT NP
83 #endif
84
85 #define GARBLE_FLAG 0x01
86
87 #ifndef HAVE_ATTRIB_PACKED
88 #define __attribute__(x)
89 #endif
90
91 #ifdef HAVE_PRAGMA_PACK
92 #pragma pack(1)
93 #endif
94
95 #ifdef HAVE_PRAGMA_PACK_HPPA
96 #pragma pack 1
97 #endif
98
99 typedef struct arj_main_hdr_tag {
100 uint8_t first_hdr_size; /* must be 30 bytes */
101 uint8_t version;
102 uint8_t min_version;
103 uint8_t host_os;
104 uint8_t flags;
105 uint8_t security_version;
106 uint8_t file_type;
107 uint8_t pad;
108 uint32_t time_created __attribute__((packed));
109 uint32_t time_modified __attribute__((packed));
110 uint32_t archive_size __attribute__((packed));
111 uint32_t sec_env_file_position __attribute__((packed));
112 uint16_t entryname_pos __attribute__((packed));
113 uint16_t sec_trail_size __attribute__((packed));
114 uint16_t host_data __attribute__((packed));
115 } arj_main_hdr_t;
116
117 typedef struct arj_file_hdr_tag {
118 uint8_t first_hdr_size; /* must be 30 bytes */
119 uint8_t version;
120 uint8_t min_version;
121 uint8_t host_os;
122 uint8_t flags;
123 uint8_t method;
124 uint8_t file_type;
125 uint8_t password_mod;
126 uint32_t time_modified __attribute__((packed));
127 uint32_t comp_size __attribute__((packed));
128 uint32_t orig_size __attribute__((packed));
129 uint32_t orig_crc __attribute__((packed));
130 uint16_t entryname_pos __attribute__((packed));
131 uint16_t file_mode __attribute__((packed));
132 uint16_t host_data __attribute__((packed));
133 } arj_file_hdr_t;
134
135 #ifdef HAVE_PRAGMA_PACK
136 #pragma pack()
137 #endif
138
139 #ifdef HAVE_PRAGMA_PACK_HPPA
140 #pragma pack
141 #endif
142
143 typedef struct arj_decode_tag {
144 unsigned char *text;
145 fmap_t *map;
146 size_t offset;
147 const uint8_t *buf;
148 const void *bufend;
149 uint16_t blocksize;
150 uint16_t bit_buf;
151 int bit_count;
152 uint32_t comp_size;
153 int16_t getlen, getbuf;
154 uint16_t left[2 * NC - 1];
155 uint16_t right[2 * NC - 1];
156 unsigned char c_len[NC];
157 uint16_t c_table[CTABLESIZE];
158 unsigned char pt_len[NPT];
159 unsigned char sub_bit_buf;
160 uint16_t pt_table[PTABLESIZE];
161 int status;
162 } arj_decode_t;
163
fill_buf(arj_decode_t * decode_data,int n)164 static cl_error_t fill_buf(arj_decode_t *decode_data, int n)
165 {
166 if (decode_data->status == CL_EFORMAT)
167 return CL_EFORMAT;
168 if (((uint64_t)decode_data->bit_buf) * (n > 0 ? 2 << (n - 1) : 0) > UINT32_MAX)
169 return CL_EFORMAT;
170 decode_data->bit_buf = (((uint64_t)decode_data->bit_buf) << n) & 0xFFFF;
171 while (n > decode_data->bit_count) {
172 decode_data->bit_buf |= decode_data->sub_bit_buf << (n -= decode_data->bit_count);
173 if (decode_data->comp_size != 0) {
174 decode_data->comp_size--;
175 if (decode_data->buf == decode_data->bufend) {
176 size_t len;
177 decode_data->buf = fmap_need_off_once_len(decode_data->map, decode_data->offset, 8192, &len);
178 if (!decode_data->buf || !len) {
179 /* the file is most likely corrupted, so
180 * we return CL_EFORMAT instead of CL_EREAD
181 */
182 decode_data->status = CL_EFORMAT;
183 return CL_EFORMAT;
184 }
185 decode_data->bufend = decode_data->buf + len;
186 }
187 decode_data->sub_bit_buf = *decode_data->buf++;
188 decode_data->offset++;
189 } else {
190 decode_data->sub_bit_buf = 0;
191 }
192 decode_data->bit_count = CHAR_BIT;
193 }
194 decode_data->bit_buf |= decode_data->sub_bit_buf >> (decode_data->bit_count -= n);
195 return CL_SUCCESS;
196 }
197
init_getbits(arj_decode_t * decode_data)198 static cl_error_t init_getbits(arj_decode_t *decode_data)
199 {
200 decode_data->bit_buf = 0;
201 decode_data->sub_bit_buf = 0;
202 decode_data->bit_count = 0;
203 return fill_buf(decode_data, 2 * CHAR_BIT);
204 }
205
arj_getbits(arj_decode_t * decode_data,int n)206 static unsigned short arj_getbits(arj_decode_t *decode_data, int n)
207 {
208 unsigned short x;
209
210 x = decode_data->bit_buf >> (2 * CHAR_BIT - n);
211 fill_buf(decode_data, n);
212 return x;
213 }
214
decode_start(arj_decode_t * decode_data)215 static cl_error_t decode_start(arj_decode_t *decode_data)
216 {
217 decode_data->blocksize = 0;
218 return init_getbits(decode_data);
219 }
220
write_text(int ofd,unsigned char * data,size_t length)221 static cl_error_t write_text(int ofd, unsigned char *data, size_t length)
222 {
223 size_t count;
224
225 count = cli_writen(ofd, data, length);
226 if (count != length) {
227 return CL_EWRITE;
228 }
229 return CL_SUCCESS;
230 }
231
make_table(arj_decode_t * decode_data,int nchar,unsigned char * bitlen,int tablebits,unsigned short * table,int tablesize)232 static cl_error_t make_table(arj_decode_t *decode_data, int nchar, unsigned char *bitlen, int tablebits,
233 unsigned short *table, int tablesize)
234 {
235 unsigned short count[17], weight[17], start[18], *p;
236 unsigned int i, k, len, ch, jutbits, avail, nextcode, mask;
237
238 for (i = 1; i <= 16; i++) {
239 count[i] = 0;
240 }
241 for (i = 0; (int)i < nchar; i++) {
242 if (bitlen[i] >= 17) {
243 cli_dbgmsg("UNARJ: bounds exceeded\n");
244 decode_data->status = CL_EUNPACK;
245 return CL_EUNPACK;
246 }
247 count[bitlen[i]]++;
248 }
249
250 start[1] = 0;
251 for (i = 1; i <= 16; i++) {
252 start[i + 1] = start[i] + (count[i] << (16 - i));
253 }
254 if (start[17] != (unsigned short)(1 << 16)) {
255 decode_data->status = CL_EUNPACK;
256 return CL_EUNPACK;
257 }
258
259 jutbits = 16 - tablebits;
260 if (tablebits >= 17) {
261 cli_dbgmsg("UNARJ: bounds exceeded\n");
262 decode_data->status = CL_EUNPACK;
263 return CL_EUNPACK;
264 }
265 for (i = 1; (int)i <= tablebits; i++) {
266 start[i] >>= jutbits;
267 weight[i] = 1 << (tablebits - i);
268 }
269 while (i <= 16) {
270 weight[i] = 1 << (16 - i);
271 i++;
272 }
273
274 i = start[tablebits + 1] >> jutbits;
275 if (i != (unsigned short)(1 << 16)) {
276 k = 1 << tablebits;
277 while (i != k) {
278 if (i >= (unsigned int)tablesize) {
279 cli_dbgmsg("UNARJ: bounds exceeded\n");
280 decode_data->status = CL_EUNPACK;
281 return CL_EUNPACK;
282 }
283 table[i++] = 0;
284 }
285 }
286
287 avail = nchar;
288 mask = 1 << (15 - tablebits);
289 for (ch = 0; (int)ch < nchar; ch++) {
290 if ((len = bitlen[ch]) == 0) {
291 continue;
292 }
293 if (len >= 17) {
294 cli_dbgmsg("UNARJ: bounds exceeded\n");
295 decode_data->status = CL_EUNPACK;
296 return CL_EUNPACK;
297 }
298 k = start[len];
299 nextcode = k + weight[len];
300 if ((int)len <= tablebits) {
301 if (nextcode > (unsigned int)tablesize) {
302 decode_data->status = CL_EUNPACK;
303 return CL_EUNPACK;
304 }
305 for (i = start[len]; i < nextcode; i++) {
306 table[i] = ch;
307 }
308 } else {
309 p = &table[k >> jutbits];
310 i = len - tablebits;
311 while (i != 0) {
312 if (*p == 0) {
313 if (avail >= (2 * NC - 1)) {
314 cli_dbgmsg("UNARJ: bounds exceeded\n");
315 decode_data->status = CL_EUNPACK;
316 return CL_EUNPACK;
317 }
318 decode_data->right[avail] = decode_data->left[avail] = 0;
319 *p = avail++;
320 }
321 if (*p >= (2 * NC - 1)) {
322 cli_dbgmsg("UNARJ: bounds exceeded\n");
323 decode_data->status = CL_EUNPACK;
324 return CL_EUNPACK;
325 }
326 if (k & mask) {
327 p = &decode_data->right[*p];
328 } else {
329 p = &decode_data->left[*p];
330 }
331 k <<= 1;
332 i--;
333 }
334 *p = ch;
335 }
336 start[len] = nextcode;
337 }
338 return CL_SUCCESS;
339 }
340
read_pt_len(arj_decode_t * decode_data,int nn,int nbit,int i_special)341 static cl_error_t read_pt_len(arj_decode_t *decode_data, int nn, int nbit, int i_special)
342 {
343 int i, n;
344 short c;
345 unsigned short mask;
346
347 n = arj_getbits(decode_data, nbit);
348 if (n == 0) {
349 if (nn > NPT) {
350 cli_dbgmsg("UNARJ: bounds exceeded\n");
351 decode_data->status = CL_EUNPACK;
352 return CL_EUNPACK;
353 }
354 c = arj_getbits(decode_data, nbit);
355 for (i = 0; i < nn; i++) {
356 decode_data->pt_len[i] = 0;
357 }
358 for (i = 0; i < 256; i++) {
359 decode_data->pt_table[i] = c;
360 }
361 } else {
362 i = 0;
363 while ((i < n) && (i < NPT)) {
364 c = decode_data->bit_buf >> 13;
365 if (c == 7) {
366 mask = 1 << 12;
367 while (mask & decode_data->bit_buf) {
368 mask >>= 1;
369 c++;
370 }
371 }
372 fill_buf(decode_data, (c < 7) ? 3 : (int)(c - 3));
373 if (decode_data->status != CL_SUCCESS) {
374 return decode_data->status;
375 }
376 decode_data->pt_len[i++] = (unsigned char)c;
377 if (i == i_special) {
378 c = arj_getbits(decode_data, 2);
379 if (decode_data->status != CL_SUCCESS) {
380 return decode_data->status;
381 }
382 while ((--c >= 0) && (i < NPT)) {
383 decode_data->pt_len[i++] = 0;
384 }
385 }
386 }
387 while ((i < nn) && (i < NPT)) {
388 decode_data->pt_len[i++] = 0;
389 }
390 if (make_table(decode_data, nn, decode_data->pt_len, 8, decode_data->pt_table, PTABLESIZE) != CL_SUCCESS) {
391 return CL_EUNPACK;
392 }
393 }
394 return CL_SUCCESS;
395 }
396
read_c_len(arj_decode_t * decode_data)397 static cl_error_t read_c_len(arj_decode_t *decode_data)
398 {
399 short i, c, n;
400 unsigned short mask;
401
402 n = arj_getbits(decode_data, CBIT);
403 if (decode_data->status != CL_SUCCESS) {
404 return decode_data->status;
405 }
406 if (n == 0) {
407 c = arj_getbits(decode_data, CBIT);
408 if (decode_data->status != CL_SUCCESS) {
409 return decode_data->status;
410 }
411 for (i = 0; i < NC; i++) {
412 decode_data->c_len[i] = 0;
413 }
414 for (i = 0; i < CTABLESIZE; i++) {
415 decode_data->c_table[i] = c;
416 }
417 } else {
418 i = 0;
419 while (i < n) {
420 c = decode_data->pt_table[decode_data->bit_buf >> 8];
421 if (c >= NT) {
422 mask = 1 << 7;
423 do {
424 if (c >= (2 * NC - 1)) {
425 cli_dbgmsg("ERROR: bounds exceeded\n");
426 decode_data->status = CL_EFORMAT;
427 return CL_EFORMAT;
428 }
429 if (decode_data->bit_buf & mask) {
430 c = decode_data->right[c];
431 } else {
432 c = decode_data->left[c];
433 }
434 mask >>= 1;
435 } while (c >= NT);
436 }
437 if (c >= 19) {
438 cli_dbgmsg("UNARJ: bounds exceeded\n");
439 decode_data->status = CL_EUNPACK;
440 return CL_EUNPACK;
441 }
442 fill_buf(decode_data, (int)(decode_data->pt_len[c]));
443 if (decode_data->status != CL_SUCCESS) {
444 return decode_data->status;
445 }
446 if (c <= 2) {
447 if (c == 0) {
448 c = 1;
449 } else if (c == 1) {
450 c = arj_getbits(decode_data, 4) + 3;
451 } else {
452 c = arj_getbits(decode_data, CBIT) + 20;
453 }
454 if (decode_data->status != CL_SUCCESS) {
455 return decode_data->status;
456 }
457 while (--c >= 0) {
458 if (i >= NC) {
459 cli_dbgmsg("ERROR: bounds exceeded\n");
460 decode_data->status = CL_EFORMAT;
461 return CL_EFORMAT;
462 }
463 decode_data->c_len[i++] = 0;
464 }
465 } else {
466 if (i >= NC) {
467 cli_dbgmsg("ERROR: bounds exceeded\n");
468 decode_data->status = CL_EFORMAT;
469 return CL_EFORMAT;
470 }
471 decode_data->c_len[i++] = (unsigned char)(c - 2);
472 }
473 }
474 while (i < NC) {
475 decode_data->c_len[i++] = 0;
476 }
477 if (make_table(decode_data, NC, decode_data->c_len, 12, decode_data->c_table, CTABLESIZE) != CL_SUCCESS) {
478 return CL_EUNPACK;
479 }
480 }
481 return CL_SUCCESS;
482 }
483
decode_c(arj_decode_t * decode_data)484 static uint16_t decode_c(arj_decode_t *decode_data)
485 {
486 uint16_t j, mask;
487
488 if (decode_data->blocksize == 0) {
489 decode_data->blocksize = arj_getbits(decode_data, 16);
490 read_pt_len(decode_data, NT, TBIT, 3);
491 read_c_len(decode_data);
492 read_pt_len(decode_data, NT, PBIT, -1);
493 }
494 decode_data->blocksize--;
495 j = decode_data->c_table[decode_data->bit_buf >> 4];
496 if (j >= NC) {
497 mask = 1 << 3;
498 do {
499 if (j >= (2 * NC - 1)) {
500 cli_dbgmsg("ERROR: bounds exceeded\n");
501 decode_data->status = CL_EUNPACK;
502 return 0;
503 }
504 if (decode_data->bit_buf & mask) {
505 j = decode_data->right[j];
506 } else {
507 j = decode_data->left[j];
508 }
509 mask >>= 1;
510 } while (j >= NC);
511 }
512 fill_buf(decode_data, (int)(decode_data->c_len[j]));
513 return j;
514 }
515
decode_p(arj_decode_t * decode_data)516 static uint16_t decode_p(arj_decode_t *decode_data)
517 {
518 unsigned short j, mask;
519
520 j = decode_data->pt_table[decode_data->bit_buf >> 8];
521 if (j >= NP) {
522 mask = 1 << 7;
523 do {
524 if (j >= (2 * NC - 1)) {
525 cli_dbgmsg("ERROR: bounds exceeded\n");
526 decode_data->status = CL_EUNPACK;
527 return 0;
528 }
529 if (decode_data->bit_buf & mask) {
530 j = decode_data->right[j];
531 } else {
532 j = decode_data->left[j];
533 }
534 mask >>= 1;
535 } while (j >= NP);
536 }
537 fill_buf(decode_data, (int)(decode_data->pt_len[j]));
538 if (j != 0) {
539 j--;
540 j = (1 << j) + arj_getbits(decode_data, (int)j);
541 }
542 return j;
543 }
544
decode(arj_metadata_t * metadata)545 static cl_error_t decode(arj_metadata_t *metadata)
546 {
547 cl_error_t ret;
548
549 arj_decode_t decode_data;
550 uint32_t count = 0, out_ptr = 0;
551 int16_t chr, i, j;
552
553 memset(&decode_data, 0, sizeof(decode_data));
554 decode_data.text = (unsigned char *)cli_calloc(DDICSIZ, 1);
555 if (!decode_data.text) {
556 return CL_EMEM;
557 }
558 decode_data.map = metadata->map;
559 decode_data.offset = metadata->offset;
560 decode_data.comp_size = metadata->comp_size;
561 ret = decode_start(&decode_data);
562 if (ret != CL_SUCCESS) {
563 free(decode_data.text);
564 metadata->offset = decode_data.offset;
565 return ret;
566 }
567 decode_data.status = CL_SUCCESS;
568
569 while (count < metadata->orig_size) {
570 if ((chr = decode_c(&decode_data)) <= UCHAR_MAX) {
571 decode_data.text[out_ptr] = (unsigned char)chr;
572 count++;
573 if (++out_ptr >= DDICSIZ) {
574 out_ptr = 0;
575 if (write_text(metadata->ofd, decode_data.text, DDICSIZ) != CL_SUCCESS) {
576 free(decode_data.text);
577 metadata->offset = decode_data.offset;
578 return CL_EWRITE;
579 }
580 }
581 } else {
582 j = chr - (UCHAR_MAX + 1 - THRESHOLD);
583 count += j;
584 i = decode_p(&decode_data);
585 if ((i = out_ptr - i - 1) < 0) {
586 i += DDICSIZ;
587 }
588 if ((i >= DDICSIZ) || (i < 0)) {
589 cli_dbgmsg("UNARJ: bounds exceeded - probably a corrupted file.\n");
590 break;
591 }
592 if (out_ptr > (uint32_t)i && out_ptr < DDICSIZ - MAXMATCH - 1) {
593 while ((--j >= 0) && (i < DDICSIZ) && (out_ptr < DDICSIZ)) {
594 decode_data.text[out_ptr++] = decode_data.text[i++];
595 }
596 } else {
597 while (--j >= 0) {
598 decode_data.text[out_ptr] = decode_data.text[i];
599 if (++out_ptr >= DDICSIZ) {
600 out_ptr = 0;
601 if (write_text(metadata->ofd, decode_data.text, DDICSIZ) != CL_SUCCESS) {
602 free(decode_data.text);
603 metadata->offset = decode_data.offset;
604 return CL_EWRITE;
605 }
606 }
607 if (++i >= DDICSIZ) {
608 i = 0;
609 }
610 }
611 }
612 }
613 if (decode_data.status != CL_SUCCESS) {
614 free(decode_data.text);
615 metadata->offset = decode_data.offset;
616 return decode_data.status;
617 }
618 }
619 if (out_ptr != 0) {
620 write_text(metadata->ofd, decode_data.text, out_ptr);
621 }
622
623 free(decode_data.text);
624 metadata->offset = decode_data.offset;
625 return CL_SUCCESS;
626 }
627
628 #define ARJ_BFIL(dd) \
629 { \
630 dd->getbuf |= dd->bit_buf >> dd->getlen; \
631 fill_buf(dd, CODE_BIT - dd->getlen); \
632 dd->getlen = CODE_BIT; \
633 }
634 #define ARJ_GETBIT(dd, c) \
635 { \
636 if (dd->getlen <= 0) ARJ_BFIL(dd) \
637 c = (dd->getbuf & 0x8000) != 0; \
638 dd->getbuf *= 2; \
639 dd->getlen--; \
640 }
641 #define ARJ_BPUL(dd, l) \
642 do { \
643 int i; \
644 int j = l; \
645 for (i = 0; i < j; i++) { \
646 dd->getbuf *= 2; \
647 } \
648 dd->getlen -= l; \
649 } while (0)
650 #define ARJ_GETBITS(dd, c, l) \
651 { \
652 if (dd->getlen < l) ARJ_BFIL(dd) \
653 c = (uint16_t)dd->getbuf >> (CODE_BIT - l); \
654 ARJ_BPUL(dd, l); \
655 }
656
decode_ptr(arj_decode_t * decode_data)657 static uint16_t decode_ptr(arj_decode_t *decode_data)
658 {
659 uint16_t c, width, plus, pwr;
660
661 plus = 0;
662 pwr = 1 << STRTP;
663 for (width = STRTP; width < STOPP; width++) {
664 ARJ_GETBIT(decode_data, c);
665 if (c == 0) {
666 break;
667 }
668 plus += pwr;
669 pwr <<= 1;
670 }
671 if (width != 0) {
672 ARJ_GETBITS(decode_data, c, width);
673 }
674 c += plus;
675 return c;
676 }
677
decode_len(arj_decode_t * decode_data)678 static uint16_t decode_len(arj_decode_t *decode_data)
679 {
680 uint16_t c, width, plus, pwr;
681
682 plus = 0;
683 pwr = 1 << STRTL;
684 for (width = STRTL; width < STOPL; width++) {
685 ARJ_GETBIT(decode_data, c);
686 if (c == 0) {
687 break;
688 }
689 plus += pwr;
690 pwr <<= 1;
691 }
692 if (width != 0) {
693 ARJ_GETBITS(decode_data, c, width);
694 }
695 c += plus;
696 return c;
697 }
698
decode_f(arj_metadata_t * metadata)699 static cl_error_t decode_f(arj_metadata_t *metadata)
700 {
701 cl_error_t ret;
702
703 arj_decode_t decode_data, *dd;
704 uint32_t count = 0, out_ptr = 0;
705 int16_t chr, i, j, pos;
706
707 dd = &decode_data;
708 memset(&decode_data, 0, sizeof(decode_data));
709 decode_data.text = (unsigned char *)cli_calloc(DDICSIZ, 1);
710 if (!decode_data.text) {
711 return CL_EMEM;
712 }
713 decode_data.map = metadata->map;
714 decode_data.offset = metadata->offset;
715 decode_data.comp_size = metadata->comp_size;
716 ret = init_getbits(&decode_data);
717 if (ret != CL_SUCCESS) {
718 free(decode_data.text);
719 metadata->offset = decode_data.offset;
720 return ret;
721 }
722 decode_data.getlen = decode_data.getbuf = 0;
723 decode_data.status = CL_SUCCESS;
724
725 while (count < metadata->orig_size) {
726 chr = decode_len(&decode_data);
727 if (decode_data.status != CL_SUCCESS) {
728 free(decode_data.text);
729 metadata->offset = decode_data.offset;
730 return decode_data.status;
731 }
732 if (chr == 0) {
733 ARJ_GETBITS(dd, chr, CHAR_BIT);
734 if (decode_data.status != CL_SUCCESS) {
735 free(decode_data.text);
736 metadata->offset = decode_data.offset;
737 return decode_data.status;
738 }
739 decode_data.text[out_ptr] = (unsigned char)chr;
740 count++;
741 if (++out_ptr >= DDICSIZ) {
742 out_ptr = 0;
743 if (write_text(metadata->ofd, decode_data.text, DDICSIZ) != CL_SUCCESS) {
744 free(decode_data.text);
745 metadata->offset = decode_data.offset;
746 return CL_EWRITE;
747 }
748 }
749 } else {
750 j = chr - 1 + THRESHOLD;
751 count += j;
752 pos = decode_ptr(&decode_data);
753 if (decode_data.status != CL_SUCCESS) {
754 free(decode_data.text);
755 metadata->offset = decode_data.offset;
756 return decode_data.status;
757 }
758 if ((i = out_ptr - pos - 1) < 0) {
759 i += DDICSIZ;
760 }
761 if ((i >= DDICSIZ) || (i < 0)) {
762 cli_dbgmsg("UNARJ: bounds exceeded - probably a corrupted file.\n");
763 break;
764 }
765 while (j-- > 0) {
766 decode_data.text[out_ptr] = decode_data.text[i];
767 if (++out_ptr >= DDICSIZ) {
768 out_ptr = 0;
769 if (write_text(metadata->ofd, decode_data.text, DDICSIZ) != CL_SUCCESS) {
770 free(decode_data.text);
771 metadata->offset = decode_data.offset;
772 return CL_EWRITE;
773 }
774 }
775 if (++i >= DDICSIZ) {
776 i = 0;
777 }
778 }
779 }
780 }
781 if (out_ptr != 0) {
782 write_text(metadata->ofd, decode_data.text, out_ptr);
783 }
784
785 free(decode_data.text);
786 metadata->offset = decode_data.offset;
787 return CL_SUCCESS;
788 }
789
arj_unstore(arj_metadata_t * metadata,int ofd,uint32_t len)790 static cl_error_t arj_unstore(arj_metadata_t *metadata, int ofd, uint32_t len)
791 {
792 const unsigned char *data;
793 uint32_t rem;
794 unsigned int todo;
795 size_t count;
796
797 cli_dbgmsg("in arj_unstore\n");
798 rem = len;
799
800 while (rem > 0) {
801 todo = (unsigned int)MIN(8192, rem);
802 data = fmap_need_off_once_len(metadata->map, metadata->offset, todo, &count);
803 if (!data || !count) {
804 /* Truncated file, not enough bytes available */
805 return CL_EFORMAT;
806 }
807 metadata->offset += count;
808 if (cli_writen(ofd, data, count) != count) {
809 /* File writing problem */
810 return CL_EWRITE;
811 }
812 rem -= count;
813 }
814 return CL_SUCCESS;
815 }
816
is_arj_archive(arj_metadata_t * metadata)817 static int is_arj_archive(arj_metadata_t *metadata)
818 {
819 const char header_id[2] = {0x60, 0xea};
820 const char *mark;
821
822 mark = fmap_need_off_once(metadata->map, metadata->offset, 2);
823 if (!mark)
824 return FALSE;
825 metadata->offset += 2;
826 if (memcmp(&mark[0], &header_id[0], 2) == 0) {
827 return TRUE;
828 }
829 cli_dbgmsg("Not an ARJ archive\n");
830 return FALSE;
831 }
832
arj_read_main_header(arj_metadata_t * metadata)833 static int arj_read_main_header(arj_metadata_t *metadata)
834 {
835 uint16_t header_size, count;
836 arj_main_hdr_t main_hdr;
837 const char *filename = NULL;
838 const char *comment = NULL;
839 struct text_norm_state fnstate, comstate;
840 unsigned char *fnnorm = NULL;
841 unsigned char *comnorm = NULL;
842 uint32_t ret = TRUE;
843
844 size_t filename_max_len = 0;
845 size_t filename_len = 0;
846 size_t comment_max_len = 0;
847 size_t comment_len = 0;
848 size_t orig_offset = metadata->offset;
849
850 if (fmap_readn(metadata->map, &header_size, metadata->offset, 2) != 2)
851 return FALSE;
852
853 metadata->offset += 2;
854 header_size = le16_to_host(header_size);
855 cli_dbgmsg("Header Size: %d\n", header_size);
856 if (header_size == 0) {
857 /* End of archive */
858 ret = FALSE;
859 goto done;
860 }
861 if (header_size > HEADERSIZE_MAX) {
862 cli_dbgmsg("arj_read_header: invalid header_size: %u\n ", header_size);
863 ret = FALSE;
864 goto done;
865 }
866 if ((header_size + sizeof(header_size)) > (metadata->map->len - metadata->offset)) {
867 cli_dbgmsg("arj_read_header: invalid header_size: %u, exceeds length of file.\n", header_size);
868 ret = FALSE;
869 goto done;
870 }
871 if (fmap_readn(metadata->map, &main_hdr, metadata->offset, 30) != 30) {
872 ret = FALSE;
873 goto done;
874 }
875 metadata->offset += 30;
876
877 cli_dbgmsg("ARJ Main File Header\n");
878 cli_dbgmsg("First Header Size: %d\n", main_hdr.first_hdr_size);
879 cli_dbgmsg("Version: %d\n", main_hdr.version);
880 cli_dbgmsg("Min version: %d\n", main_hdr.min_version);
881 cli_dbgmsg("Host OS: %d\n", main_hdr.host_os);
882 cli_dbgmsg("Flags: 0x%x\n", main_hdr.flags);
883 cli_dbgmsg("Security version: %d\n", main_hdr.security_version);
884 cli_dbgmsg("File type: %d\n", main_hdr.file_type);
885
886 if (main_hdr.first_hdr_size < 30) {
887 cli_dbgmsg("Format error. First Header Size < 30\n");
888 ret = FALSE;
889 goto done;
890 }
891 if (main_hdr.first_hdr_size > 30) {
892 metadata->offset += main_hdr.first_hdr_size - 30;
893 }
894
895 filename_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
896 if (filename_max_len > header_size) {
897 cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
898 ret = FALSE;
899 goto done;
900 }
901 if (filename_max_len > 0) {
902 fnnorm = cli_calloc(sizeof(unsigned char), filename_max_len + 1);
903 filename = fmap_need_offstr(metadata->map, metadata->offset, filename_max_len + 1);
904 if (!filename || !fnnorm) {
905 cli_dbgmsg("UNARJ: Unable to allocate memory for filename\n");
906 ret = FALSE;
907 goto done;
908 }
909 filename_len = CLI_STRNLEN(filename, filename_max_len);
910 }
911 metadata->offset += filename_len + 1;
912
913 comment_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
914 if (comment_max_len > header_size) {
915 cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
916 ret = FALSE;
917 goto done;
918 }
919 if (comment_max_len > 0) {
920 comnorm = cli_calloc(sizeof(unsigned char), comment_max_len + 1);
921 comment = fmap_need_offstr(metadata->map, metadata->offset, comment_max_len + 1);
922 if (!comment || !comnorm) {
923 cli_dbgmsg("UNARJ: Unable to allocate memory for comment\n");
924 ret = FALSE;
925 goto done;
926 }
927 comment_len = CLI_STRNLEN(comment, comment_max_len);
928 }
929 metadata->offset += comment_len + 1;
930
931 text_normalize_init(&fnstate, fnnorm, filename_max_len);
932 text_normalize_init(&comstate, comnorm, comment_max_len);
933
934 text_normalize_buffer(&fnstate, (const unsigned char *)filename, filename_len);
935 text_normalize_buffer(&comstate, (const unsigned char *)comment, comment_len);
936
937 cli_dbgmsg("Filename: %s\n", fnnorm);
938 cli_dbgmsg("Comment: %s\n", comnorm);
939
940 metadata->offset += 4; /* crc */
941 /* Skip past any extended header data */
942 for (;;) {
943 const uint16_t *countp = fmap_need_off_once(metadata->map, metadata->offset, 2);
944 if (!countp) {
945 ret = FALSE;
946 goto done;
947 }
948 count = cli_readint16(countp);
949 metadata->offset += 2;
950 cli_dbgmsg("Extended header size: %d\n", count);
951 if (count == 0) {
952 break;
953 }
954 /* Skip extended header + 4byte CRC */
955 metadata->offset += count + 4;
956 }
957
958 done:
959
960 if (fnnorm) {
961 free(fnnorm);
962 fnnorm = NULL;
963 }
964
965 if (comnorm) {
966 free(comnorm);
967 comnorm = NULL;
968 }
969 return ret;
970 }
971
arj_read_file_header(arj_metadata_t * metadata)972 static cl_error_t arj_read_file_header(arj_metadata_t *metadata)
973 {
974 uint16_t header_size, count;
975 const char *filename, *comment;
976 arj_file_hdr_t file_hdr;
977 struct text_norm_state fnstate, comstate;
978 unsigned char *fnnorm = NULL;
979 unsigned char *comnorm = NULL;
980 cl_error_t ret = CL_SUCCESS;
981
982 size_t filename_max_len = 0;
983 size_t filename_len = 0;
984 size_t comment_max_len = 0;
985 size_t comment_len = 0;
986 size_t orig_offset = metadata->offset;
987
988 if (fmap_readn(metadata->map, &header_size, metadata->offset, 2) != 2)
989 return CL_EFORMAT;
990 header_size = le16_to_host(header_size);
991 metadata->offset += 2;
992
993 cli_dbgmsg("Header Size: %d\n", header_size);
994 if (header_size == 0) {
995 /* End of archive */
996 ret = CL_BREAK;
997 goto done;
998 }
999 if (header_size > HEADERSIZE_MAX) {
1000 cli_dbgmsg("arj_read_file_header: invalid header_size: %u\n ", header_size);
1001 ret = CL_EFORMAT;
1002 goto done;
1003 }
1004 if ((header_size + sizeof(header_size)) > (metadata->map->len - metadata->offset)) {
1005 cli_dbgmsg("arj_read_file_header: invalid header_size: %u, exceeds length of file.\n", header_size);
1006 ret = CL_EFORMAT;
1007 goto done;
1008 }
1009 if (fmap_readn(metadata->map, &file_hdr, metadata->offset, 30) != 30) {
1010 ret = CL_EFORMAT;
1011 goto done;
1012 }
1013 metadata->offset += 30;
1014 file_hdr.comp_size = le32_to_host(file_hdr.comp_size);
1015 file_hdr.orig_size = le32_to_host(file_hdr.orig_size);
1016
1017 cli_dbgmsg("ARJ File Header\n");
1018 cli_dbgmsg("First Header Size: %d\n", file_hdr.first_hdr_size);
1019 cli_dbgmsg("Version: %d\n", file_hdr.version);
1020 cli_dbgmsg("Min version: %d\n", file_hdr.min_version);
1021 cli_dbgmsg("Host OS: %d\n", file_hdr.host_os);
1022 cli_dbgmsg("Flags: 0x%x\n", file_hdr.flags);
1023 cli_dbgmsg("Method: %d\n", file_hdr.method);
1024 cli_dbgmsg("File type: %d\n", file_hdr.file_type);
1025 cli_dbgmsg("File type: %d\n", file_hdr.password_mod);
1026 cli_dbgmsg("Compressed size: %u\n", file_hdr.comp_size);
1027 cli_dbgmsg("Original size: %u\n", file_hdr.orig_size);
1028
1029 if (file_hdr.first_hdr_size < 30) {
1030 cli_dbgmsg("Format error. First Header Size < 30\n");
1031 ret = CL_EFORMAT;
1032 goto done;
1033 }
1034
1035 /* Note: this skips past any extended file start position data (multi-volume) */
1036 if (file_hdr.first_hdr_size > 30) {
1037 metadata->offset += file_hdr.first_hdr_size - 30;
1038 }
1039
1040 filename_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
1041 if (filename_max_len > header_size) {
1042 cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
1043 ret = CL_EFORMAT;
1044 goto done;
1045 }
1046 if (filename_max_len > 0) {
1047 fnnorm = cli_calloc(sizeof(unsigned char), filename_max_len + 1);
1048 if (!fnnorm) {
1049 cli_dbgmsg("UNARJ: Unable to allocate memory for filename\n");
1050 ret = CL_EMEM;
1051 goto done;
1052 }
1053 filename = fmap_need_offstr(metadata->map, metadata->offset, filename_max_len + 1);
1054 if (!filename) {
1055 cli_dbgmsg("UNARJ: Filename is out of file\n");
1056 ret = CL_EFORMAT;
1057 goto done;
1058 }
1059 filename_len = CLI_STRNLEN(filename, filename_max_len);
1060 }
1061 metadata->offset += filename_len + 1;
1062
1063 comment_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
1064 if (comment_max_len > header_size) {
1065 cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
1066 ret = CL_EFORMAT;
1067 goto done;
1068 }
1069 if (comment_max_len > 0) {
1070 comnorm = cli_calloc(sizeof(unsigned char), comment_max_len + 1);
1071 if (!comnorm) {
1072 cli_dbgmsg("UNARJ: Unable to allocate memory for comment\n");
1073 ret = CL_EMEM;
1074 goto done;
1075 }
1076 comment = fmap_need_offstr(metadata->map, metadata->offset, comment_max_len + 1);
1077 if (!comment) {
1078 cli_dbgmsg("UNARJ: comment is out of file\n");
1079 ret = CL_EFORMAT;
1080 goto done;
1081 }
1082 comment_len += CLI_STRNLEN(comment, comment_max_len);
1083 }
1084 metadata->offset += comment_len + 1;
1085
1086 text_normalize_init(&fnstate, fnnorm, filename_max_len);
1087 text_normalize_init(&comstate, comnorm, comment_max_len);
1088
1089 text_normalize_buffer(&fnstate, (const unsigned char *)filename, filename_len);
1090 text_normalize_buffer(&comstate, (const unsigned char *)comment, comment_len);
1091
1092 cli_dbgmsg("Filename: %s\n", fnnorm);
1093 cli_dbgmsg("Comment: %s\n", comnorm);
1094 metadata->filename = CLI_STRNDUP(filename, filename_len);
1095
1096 /* Skip CRC */
1097 metadata->offset += 4;
1098
1099 /* Skip past any extended header data */
1100 for (;;) {
1101 const uint16_t *countp = fmap_need_off_once(metadata->map, metadata->offset, 2);
1102 if (!countp) {
1103 if (metadata->filename)
1104 free(metadata->filename);
1105 metadata->filename = NULL;
1106 ret = CL_EFORMAT;
1107 goto done;
1108 }
1109 count = cli_readint16(countp);
1110 metadata->offset += 2;
1111 cli_dbgmsg("Extended header size: %d\n", count);
1112 if (count == 0) {
1113 break;
1114 }
1115 /* Skip extended header + 4byte CRC */
1116 metadata->offset += count + 4;
1117 }
1118 metadata->comp_size = file_hdr.comp_size;
1119 metadata->orig_size = file_hdr.orig_size;
1120 metadata->method = file_hdr.method;
1121 metadata->encrypted = ((file_hdr.flags & GARBLE_FLAG) != 0) ? TRUE : FALSE;
1122 metadata->ofd = -1;
1123 if (!metadata->filename) {
1124 ret = CL_EMEM;
1125 goto done;
1126 }
1127
1128 done:
1129
1130 if (fnnorm) {
1131 free(fnnorm);
1132 fnnorm = NULL;
1133 }
1134
1135 if (comnorm) {
1136 free(comnorm);
1137 comnorm = NULL;
1138 }
1139 return ret;
1140 }
1141
cli_unarj_open(fmap_t * map,const char * dirname,arj_metadata_t * metadata)1142 cl_error_t cli_unarj_open(fmap_t *map, const char *dirname, arj_metadata_t *metadata)
1143 {
1144 UNUSEDPARAM(dirname);
1145 cli_dbgmsg("in cli_unarj_open\n");
1146 metadata->map = map;
1147 metadata->offset = 0;
1148 if (!is_arj_archive(metadata)) {
1149 cli_dbgmsg("Not in ARJ format\n");
1150 return CL_EFORMAT;
1151 }
1152 if (!arj_read_main_header(metadata)) {
1153 cli_dbgmsg("Failed to read main header\n");
1154 return CL_EFORMAT;
1155 }
1156 return CL_SUCCESS;
1157 }
1158
cli_unarj_prepare_file(const char * dirname,arj_metadata_t * metadata)1159 cl_error_t cli_unarj_prepare_file(const char *dirname, arj_metadata_t *metadata)
1160 {
1161 cli_dbgmsg("in cli_unarj_prepare_file\n");
1162 if (!metadata || !dirname) {
1163 return CL_ENULLARG;
1164 }
1165 /* Each file is preceded by the ARJ file marker */
1166 if (!is_arj_archive(metadata)) {
1167 cli_dbgmsg("Not in ARJ format\n");
1168 return CL_EFORMAT;
1169 }
1170 return arj_read_file_header(metadata);
1171 }
1172
cli_unarj_extract_file(const char * dirname,arj_metadata_t * metadata)1173 cl_error_t cli_unarj_extract_file(const char *dirname, arj_metadata_t *metadata)
1174 {
1175 cl_error_t ret = CL_SUCCESS;
1176 char filename[1024];
1177
1178 cli_dbgmsg("in cli_unarj_extract_file\n");
1179 if (!metadata || !dirname) {
1180 return CL_ENULLARG;
1181 }
1182
1183 if (metadata->encrypted) {
1184 cli_dbgmsg("PASSWORDed file (skipping)\n");
1185 metadata->offset += metadata->comp_size;
1186 cli_dbgmsg("Target offset: %lu\n", (unsigned long int)metadata->offset);
1187 return CL_SUCCESS;
1188 }
1189
1190 snprintf(filename, 1024, "%s" PATHSEP "file.uar", dirname);
1191 cli_dbgmsg("Filename: %s\n", filename);
1192 metadata->ofd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0600);
1193 if (metadata->ofd < 0) {
1194 return CL_EOPEN;
1195 }
1196 switch (metadata->method) {
1197 case 0:
1198 ret = arj_unstore(metadata, metadata->ofd, metadata->comp_size);
1199 break;
1200 case 1:
1201 case 2:
1202 case 3:
1203 ret = decode(metadata);
1204 break;
1205 case 4:
1206 ret = decode_f(metadata);
1207 break;
1208 default:
1209 ret = CL_EFORMAT;
1210 break;
1211 }
1212 return ret;
1213 }
1214