1 /*
2  *  Extract component parts of ARJ archives.
3  *
4  *  Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5  *  Copyright (C) 2007-2013 Sourcefire, Inc.
6  *
7  *  Authors: Trog
8  *
9  *  This program is free software; you can redistribute it and/or modify
10  *  it under the terms of the GNU General Public License version 2 as
11  *  published by the Free Software Foundation.
12  *
13  *  This program is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *  GNU General Public License for more details.
17  *
18  *  You should have received a copy of the GNU General Public License
19  *  along with this program; if not, write to the Free Software
20  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21  *  MA 02110-1301, USA.
22  */
23 
24 #if HAVE_CONFIG_H
25 #include "clamav-config.h"
26 #endif
27 
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #ifdef HAVE_UNISTD_H
32 #include <unistd.h>
33 #endif
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <fcntl.h>
37 #include <ctype.h>
38 
39 #include "clamav.h"
40 #include "str.h"
41 #include "others.h"
42 #include "unarj.h"
43 #include "textnorm.h"
44 
45 #define FIRST_HDR_SIZE 30
46 #define COMMENT_MAX 2048
47 #define FNAME_MAX 512
48 #define HEADERSIZE_MAX (FIRST_HDR_SIZE + 10 + FNAME_MAX + COMMENT_MAX)
49 #define MAXDICBIT 16
50 #define DDICSIZ 26624
51 #define THRESHOLD 3
52 #ifndef UCHAR_MAX
53 #define UCHAR_MAX (255)
54 #endif
55 #ifndef CHAR_BIT
56 #define CHAR_BIT (8)
57 #endif
58 #define MAXMATCH 256
59 #ifndef FALSE
60 #define FALSE (0)
61 #define TRUE (1)
62 #endif
63 
64 #define CODE_BIT 16
65 #define NT (CODE_BIT + 3)
66 #define PBIT 5
67 #define TBIT 5
68 #define NC (UCHAR_MAX + MAXMATCH + 2 - THRESHOLD)
69 #define NP (MAXDICBIT + 1)
70 #define CBIT 9
71 #define CTABLESIZE 4096
72 #define PTABLESIZE 256
73 #define STRTP 9
74 #define STOPP 13
75 
76 #define STRTL 0
77 #define STOPL 7
78 
79 #if NT > NP
80 #define NPT NT
81 #else
82 #define NPT NP
83 #endif
84 
85 #define GARBLE_FLAG 0x01
86 
87 #ifndef HAVE_ATTRIB_PACKED
88 #define __attribute__(x)
89 #endif
90 
91 #ifdef HAVE_PRAGMA_PACK
92 #pragma pack(1)
93 #endif
94 
95 #ifdef HAVE_PRAGMA_PACK_HPPA
96 #pragma pack 1
97 #endif
98 
99 typedef struct arj_main_hdr_tag {
100     uint8_t first_hdr_size; /* must be 30 bytes */
101     uint8_t version;
102     uint8_t min_version;
103     uint8_t host_os;
104     uint8_t flags;
105     uint8_t security_version;
106     uint8_t file_type;
107     uint8_t pad;
108     uint32_t time_created __attribute__((packed));
109     uint32_t time_modified __attribute__((packed));
110     uint32_t archive_size __attribute__((packed));
111     uint32_t sec_env_file_position __attribute__((packed));
112     uint16_t entryname_pos __attribute__((packed));
113     uint16_t sec_trail_size __attribute__((packed));
114     uint16_t host_data __attribute__((packed));
115 } arj_main_hdr_t;
116 
117 typedef struct arj_file_hdr_tag {
118     uint8_t first_hdr_size; /* must be 30 bytes */
119     uint8_t version;
120     uint8_t min_version;
121     uint8_t host_os;
122     uint8_t flags;
123     uint8_t method;
124     uint8_t file_type;
125     uint8_t password_mod;
126     uint32_t time_modified __attribute__((packed));
127     uint32_t comp_size __attribute__((packed));
128     uint32_t orig_size __attribute__((packed));
129     uint32_t orig_crc __attribute__((packed));
130     uint16_t entryname_pos __attribute__((packed));
131     uint16_t file_mode __attribute__((packed));
132     uint16_t host_data __attribute__((packed));
133 } arj_file_hdr_t;
134 
135 #ifdef HAVE_PRAGMA_PACK
136 #pragma pack()
137 #endif
138 
139 #ifdef HAVE_PRAGMA_PACK_HPPA
140 #pragma pack
141 #endif
142 
143 typedef struct arj_decode_tag {
144     unsigned char *text;
145     fmap_t *map;
146     size_t offset;
147     const uint8_t *buf;
148     const void *bufend;
149     uint16_t blocksize;
150     uint16_t bit_buf;
151     int bit_count;
152     uint32_t comp_size;
153     int16_t getlen, getbuf;
154     uint16_t left[2 * NC - 1];
155     uint16_t right[2 * NC - 1];
156     unsigned char c_len[NC];
157     uint16_t c_table[CTABLESIZE];
158     unsigned char pt_len[NPT];
159     unsigned char sub_bit_buf;
160     uint16_t pt_table[PTABLESIZE];
161     int status;
162 } arj_decode_t;
163 
fill_buf(arj_decode_t * decode_data,int n)164 static cl_error_t fill_buf(arj_decode_t *decode_data, int n)
165 {
166     if (decode_data->status == CL_EFORMAT)
167         return CL_EFORMAT;
168     if (((uint64_t)decode_data->bit_buf) * (n > 0 ? 2 << (n - 1) : 0) > UINT32_MAX)
169         return CL_EFORMAT;
170     decode_data->bit_buf = (((uint64_t)decode_data->bit_buf) << n) & 0xFFFF;
171     while (n > decode_data->bit_count) {
172         decode_data->bit_buf |= decode_data->sub_bit_buf << (n -= decode_data->bit_count);
173         if (decode_data->comp_size != 0) {
174             decode_data->comp_size--;
175             if (decode_data->buf == decode_data->bufend) {
176                 size_t len;
177                 decode_data->buf = fmap_need_off_once_len(decode_data->map, decode_data->offset, 8192, &len);
178                 if (!decode_data->buf || !len) {
179                     /* the file is most likely corrupted, so
180 				 * we return CL_EFORMAT instead of CL_EREAD
181 				 */
182                     decode_data->status = CL_EFORMAT;
183                     return CL_EFORMAT;
184                 }
185                 decode_data->bufend = decode_data->buf + len;
186             }
187             decode_data->sub_bit_buf = *decode_data->buf++;
188             decode_data->offset++;
189         } else {
190             decode_data->sub_bit_buf = 0;
191         }
192         decode_data->bit_count = CHAR_BIT;
193     }
194     decode_data->bit_buf |= decode_data->sub_bit_buf >> (decode_data->bit_count -= n);
195     return CL_SUCCESS;
196 }
197 
init_getbits(arj_decode_t * decode_data)198 static cl_error_t init_getbits(arj_decode_t *decode_data)
199 {
200     decode_data->bit_buf     = 0;
201     decode_data->sub_bit_buf = 0;
202     decode_data->bit_count   = 0;
203     return fill_buf(decode_data, 2 * CHAR_BIT);
204 }
205 
arj_getbits(arj_decode_t * decode_data,int n)206 static unsigned short arj_getbits(arj_decode_t *decode_data, int n)
207 {
208     unsigned short x;
209 
210     x = decode_data->bit_buf >> (2 * CHAR_BIT - n);
211     fill_buf(decode_data, n);
212     return x;
213 }
214 
decode_start(arj_decode_t * decode_data)215 static cl_error_t decode_start(arj_decode_t *decode_data)
216 {
217     decode_data->blocksize = 0;
218     return init_getbits(decode_data);
219 }
220 
write_text(int ofd,unsigned char * data,size_t length)221 static cl_error_t write_text(int ofd, unsigned char *data, size_t length)
222 {
223     size_t count;
224 
225     count = cli_writen(ofd, data, length);
226     if (count != length) {
227         return CL_EWRITE;
228     }
229     return CL_SUCCESS;
230 }
231 
make_table(arj_decode_t * decode_data,int nchar,unsigned char * bitlen,int tablebits,unsigned short * table,int tablesize)232 static cl_error_t make_table(arj_decode_t *decode_data, int nchar, unsigned char *bitlen, int tablebits,
233                              unsigned short *table, int tablesize)
234 {
235     unsigned short count[17], weight[17], start[18], *p;
236     unsigned int i, k, len, ch, jutbits, avail, nextcode, mask;
237 
238     for (i = 1; i <= 16; i++) {
239         count[i] = 0;
240     }
241     for (i = 0; (int)i < nchar; i++) {
242         if (bitlen[i] >= 17) {
243             cli_dbgmsg("UNARJ: bounds exceeded\n");
244             decode_data->status = CL_EUNPACK;
245             return CL_EUNPACK;
246         }
247         count[bitlen[i]]++;
248     }
249 
250     start[1] = 0;
251     for (i = 1; i <= 16; i++) {
252         start[i + 1] = start[i] + (count[i] << (16 - i));
253     }
254     if (start[17] != (unsigned short)(1 << 16)) {
255         decode_data->status = CL_EUNPACK;
256         return CL_EUNPACK;
257     }
258 
259     jutbits = 16 - tablebits;
260     if (tablebits >= 17) {
261         cli_dbgmsg("UNARJ: bounds exceeded\n");
262         decode_data->status = CL_EUNPACK;
263         return CL_EUNPACK;
264     }
265     for (i = 1; (int)i <= tablebits; i++) {
266         start[i] >>= jutbits;
267         weight[i] = 1 << (tablebits - i);
268     }
269     while (i <= 16) {
270         weight[i] = 1 << (16 - i);
271         i++;
272     }
273 
274     i = start[tablebits + 1] >> jutbits;
275     if (i != (unsigned short)(1 << 16)) {
276         k = 1 << tablebits;
277         while (i != k) {
278             if (i >= (unsigned int)tablesize) {
279                 cli_dbgmsg("UNARJ: bounds exceeded\n");
280                 decode_data->status = CL_EUNPACK;
281                 return CL_EUNPACK;
282             }
283             table[i++] = 0;
284         }
285     }
286 
287     avail = nchar;
288     mask  = 1 << (15 - tablebits);
289     for (ch = 0; (int)ch < nchar; ch++) {
290         if ((len = bitlen[ch]) == 0) {
291             continue;
292         }
293         if (len >= 17) {
294             cli_dbgmsg("UNARJ: bounds exceeded\n");
295             decode_data->status = CL_EUNPACK;
296             return CL_EUNPACK;
297         }
298         k        = start[len];
299         nextcode = k + weight[len];
300         if ((int)len <= tablebits) {
301             if (nextcode > (unsigned int)tablesize) {
302                 decode_data->status = CL_EUNPACK;
303                 return CL_EUNPACK;
304             }
305             for (i = start[len]; i < nextcode; i++) {
306                 table[i] = ch;
307             }
308         } else {
309             p = &table[k >> jutbits];
310             i = len - tablebits;
311             while (i != 0) {
312                 if (*p == 0) {
313                     if (avail >= (2 * NC - 1)) {
314                         cli_dbgmsg("UNARJ: bounds exceeded\n");
315                         decode_data->status = CL_EUNPACK;
316                         return CL_EUNPACK;
317                     }
318                     decode_data->right[avail] = decode_data->left[avail] = 0;
319                     *p                                                   = avail++;
320                 }
321                 if (*p >= (2 * NC - 1)) {
322                     cli_dbgmsg("UNARJ: bounds exceeded\n");
323                     decode_data->status = CL_EUNPACK;
324                     return CL_EUNPACK;
325                 }
326                 if (k & mask) {
327                     p = &decode_data->right[*p];
328                 } else {
329                     p = &decode_data->left[*p];
330                 }
331                 k <<= 1;
332                 i--;
333             }
334             *p = ch;
335         }
336         start[len] = nextcode;
337     }
338     return CL_SUCCESS;
339 }
340 
read_pt_len(arj_decode_t * decode_data,int nn,int nbit,int i_special)341 static cl_error_t read_pt_len(arj_decode_t *decode_data, int nn, int nbit, int i_special)
342 {
343     int i, n;
344     short c;
345     unsigned short mask;
346 
347     n = arj_getbits(decode_data, nbit);
348     if (n == 0) {
349         if (nn > NPT) {
350             cli_dbgmsg("UNARJ: bounds exceeded\n");
351             decode_data->status = CL_EUNPACK;
352             return CL_EUNPACK;
353         }
354         c = arj_getbits(decode_data, nbit);
355         for (i = 0; i < nn; i++) {
356             decode_data->pt_len[i] = 0;
357         }
358         for (i = 0; i < 256; i++) {
359             decode_data->pt_table[i] = c;
360         }
361     } else {
362         i = 0;
363         while ((i < n) && (i < NPT)) {
364             c = decode_data->bit_buf >> 13;
365             if (c == 7) {
366                 mask = 1 << 12;
367                 while (mask & decode_data->bit_buf) {
368                     mask >>= 1;
369                     c++;
370                 }
371             }
372             fill_buf(decode_data, (c < 7) ? 3 : (int)(c - 3));
373             if (decode_data->status != CL_SUCCESS) {
374                 return decode_data->status;
375             }
376             decode_data->pt_len[i++] = (unsigned char)c;
377             if (i == i_special) {
378                 c = arj_getbits(decode_data, 2);
379                 if (decode_data->status != CL_SUCCESS) {
380                     return decode_data->status;
381                 }
382                 while ((--c >= 0) && (i < NPT)) {
383                     decode_data->pt_len[i++] = 0;
384                 }
385             }
386         }
387         while ((i < nn) && (i < NPT)) {
388             decode_data->pt_len[i++] = 0;
389         }
390         if (make_table(decode_data, nn, decode_data->pt_len, 8, decode_data->pt_table, PTABLESIZE) != CL_SUCCESS) {
391             return CL_EUNPACK;
392         }
393     }
394     return CL_SUCCESS;
395 }
396 
read_c_len(arj_decode_t * decode_data)397 static cl_error_t read_c_len(arj_decode_t *decode_data)
398 {
399     short i, c, n;
400     unsigned short mask;
401 
402     n = arj_getbits(decode_data, CBIT);
403     if (decode_data->status != CL_SUCCESS) {
404         return decode_data->status;
405     }
406     if (n == 0) {
407         c = arj_getbits(decode_data, CBIT);
408         if (decode_data->status != CL_SUCCESS) {
409             return decode_data->status;
410         }
411         for (i = 0; i < NC; i++) {
412             decode_data->c_len[i] = 0;
413         }
414         for (i = 0; i < CTABLESIZE; i++) {
415             decode_data->c_table[i] = c;
416         }
417     } else {
418         i = 0;
419         while (i < n) {
420             c = decode_data->pt_table[decode_data->bit_buf >> 8];
421             if (c >= NT) {
422                 mask = 1 << 7;
423                 do {
424                     if (c >= (2 * NC - 1)) {
425                         cli_dbgmsg("ERROR: bounds exceeded\n");
426                         decode_data->status = CL_EFORMAT;
427                         return CL_EFORMAT;
428                     }
429                     if (decode_data->bit_buf & mask) {
430                         c = decode_data->right[c];
431                     } else {
432                         c = decode_data->left[c];
433                     }
434                     mask >>= 1;
435                 } while (c >= NT);
436             }
437             if (c >= 19) {
438                 cli_dbgmsg("UNARJ: bounds exceeded\n");
439                 decode_data->status = CL_EUNPACK;
440                 return CL_EUNPACK;
441             }
442             fill_buf(decode_data, (int)(decode_data->pt_len[c]));
443             if (decode_data->status != CL_SUCCESS) {
444                 return decode_data->status;
445             }
446             if (c <= 2) {
447                 if (c == 0) {
448                     c = 1;
449                 } else if (c == 1) {
450                     c = arj_getbits(decode_data, 4) + 3;
451                 } else {
452                     c = arj_getbits(decode_data, CBIT) + 20;
453                 }
454                 if (decode_data->status != CL_SUCCESS) {
455                     return decode_data->status;
456                 }
457                 while (--c >= 0) {
458                     if (i >= NC) {
459                         cli_dbgmsg("ERROR: bounds exceeded\n");
460                         decode_data->status = CL_EFORMAT;
461                         return CL_EFORMAT;
462                     }
463                     decode_data->c_len[i++] = 0;
464                 }
465             } else {
466                 if (i >= NC) {
467                     cli_dbgmsg("ERROR: bounds exceeded\n");
468                     decode_data->status = CL_EFORMAT;
469                     return CL_EFORMAT;
470                 }
471                 decode_data->c_len[i++] = (unsigned char)(c - 2);
472             }
473         }
474         while (i < NC) {
475             decode_data->c_len[i++] = 0;
476         }
477         if (make_table(decode_data, NC, decode_data->c_len, 12, decode_data->c_table, CTABLESIZE) != CL_SUCCESS) {
478             return CL_EUNPACK;
479         }
480     }
481     return CL_SUCCESS;
482 }
483 
decode_c(arj_decode_t * decode_data)484 static uint16_t decode_c(arj_decode_t *decode_data)
485 {
486     uint16_t j, mask;
487 
488     if (decode_data->blocksize == 0) {
489         decode_data->blocksize = arj_getbits(decode_data, 16);
490         read_pt_len(decode_data, NT, TBIT, 3);
491         read_c_len(decode_data);
492         read_pt_len(decode_data, NT, PBIT, -1);
493     }
494     decode_data->blocksize--;
495     j = decode_data->c_table[decode_data->bit_buf >> 4];
496     if (j >= NC) {
497         mask = 1 << 3;
498         do {
499             if (j >= (2 * NC - 1)) {
500                 cli_dbgmsg("ERROR: bounds exceeded\n");
501                 decode_data->status = CL_EUNPACK;
502                 return 0;
503             }
504             if (decode_data->bit_buf & mask) {
505                 j = decode_data->right[j];
506             } else {
507                 j = decode_data->left[j];
508             }
509             mask >>= 1;
510         } while (j >= NC);
511     }
512     fill_buf(decode_data, (int)(decode_data->c_len[j]));
513     return j;
514 }
515 
decode_p(arj_decode_t * decode_data)516 static uint16_t decode_p(arj_decode_t *decode_data)
517 {
518     unsigned short j, mask;
519 
520     j = decode_data->pt_table[decode_data->bit_buf >> 8];
521     if (j >= NP) {
522         mask = 1 << 7;
523         do {
524             if (j >= (2 * NC - 1)) {
525                 cli_dbgmsg("ERROR: bounds exceeded\n");
526                 decode_data->status = CL_EUNPACK;
527                 return 0;
528             }
529             if (decode_data->bit_buf & mask) {
530                 j = decode_data->right[j];
531             } else {
532                 j = decode_data->left[j];
533             }
534             mask >>= 1;
535         } while (j >= NP);
536     }
537     fill_buf(decode_data, (int)(decode_data->pt_len[j]));
538     if (j != 0) {
539         j--;
540         j = (1 << j) + arj_getbits(decode_data, (int)j);
541     }
542     return j;
543 }
544 
decode(arj_metadata_t * metadata)545 static cl_error_t decode(arj_metadata_t *metadata)
546 {
547     cl_error_t ret;
548 
549     arj_decode_t decode_data;
550     uint32_t count = 0, out_ptr = 0;
551     int16_t chr, i, j;
552 
553     memset(&decode_data, 0, sizeof(decode_data));
554     decode_data.text = (unsigned char *)cli_calloc(DDICSIZ, 1);
555     if (!decode_data.text) {
556         return CL_EMEM;
557     }
558     decode_data.map       = metadata->map;
559     decode_data.offset    = metadata->offset;
560     decode_data.comp_size = metadata->comp_size;
561     ret                   = decode_start(&decode_data);
562     if (ret != CL_SUCCESS) {
563         free(decode_data.text);
564         metadata->offset = decode_data.offset;
565         return ret;
566     }
567     decode_data.status = CL_SUCCESS;
568 
569     while (count < metadata->orig_size) {
570         if ((chr = decode_c(&decode_data)) <= UCHAR_MAX) {
571             decode_data.text[out_ptr] = (unsigned char)chr;
572             count++;
573             if (++out_ptr >= DDICSIZ) {
574                 out_ptr = 0;
575                 if (write_text(metadata->ofd, decode_data.text, DDICSIZ) != CL_SUCCESS) {
576                     free(decode_data.text);
577                     metadata->offset = decode_data.offset;
578                     return CL_EWRITE;
579                 }
580             }
581         } else {
582             j = chr - (UCHAR_MAX + 1 - THRESHOLD);
583             count += j;
584             i = decode_p(&decode_data);
585             if ((i = out_ptr - i - 1) < 0) {
586                 i += DDICSIZ;
587             }
588             if ((i >= DDICSIZ) || (i < 0)) {
589                 cli_dbgmsg("UNARJ: bounds exceeded - probably a corrupted file.\n");
590                 break;
591             }
592             if (out_ptr > (uint32_t)i && out_ptr < DDICSIZ - MAXMATCH - 1) {
593                 while ((--j >= 0) && (i < DDICSIZ) && (out_ptr < DDICSIZ)) {
594                     decode_data.text[out_ptr++] = decode_data.text[i++];
595                 }
596             } else {
597                 while (--j >= 0) {
598                     decode_data.text[out_ptr] = decode_data.text[i];
599                     if (++out_ptr >= DDICSIZ) {
600                         out_ptr = 0;
601                         if (write_text(metadata->ofd, decode_data.text, DDICSIZ) != CL_SUCCESS) {
602                             free(decode_data.text);
603                             metadata->offset = decode_data.offset;
604                             return CL_EWRITE;
605                         }
606                     }
607                     if (++i >= DDICSIZ) {
608                         i = 0;
609                     }
610                 }
611             }
612         }
613         if (decode_data.status != CL_SUCCESS) {
614             free(decode_data.text);
615             metadata->offset = decode_data.offset;
616             return decode_data.status;
617         }
618     }
619     if (out_ptr != 0) {
620         write_text(metadata->ofd, decode_data.text, out_ptr);
621     }
622 
623     free(decode_data.text);
624     metadata->offset = decode_data.offset;
625     return CL_SUCCESS;
626 }
627 
628 #define ARJ_BFIL(dd)                             \
629     {                                            \
630         dd->getbuf |= dd->bit_buf >> dd->getlen; \
631         fill_buf(dd, CODE_BIT - dd->getlen);     \
632         dd->getlen = CODE_BIT;                   \
633     }
634 #define ARJ_GETBIT(dd, c)                 \
635     {                                     \
636         if (dd->getlen <= 0) ARJ_BFIL(dd) \
637         c = (dd->getbuf & 0x8000) != 0;   \
638         dd->getbuf *= 2;                  \
639         dd->getlen--;                     \
640     }
641 #define ARJ_BPUL(dd, l)           \
642     do {                          \
643         int i;                    \
644         int j = l;                \
645         for (i = 0; i < j; i++) { \
646             dd->getbuf *= 2;      \
647         }                         \
648         dd->getlen -= l;          \
649     } while (0)
650 #define ARJ_GETBITS(dd, c, l)                       \
651     {                                               \
652         if (dd->getlen < l) ARJ_BFIL(dd)            \
653         c = (uint16_t)dd->getbuf >> (CODE_BIT - l); \
654         ARJ_BPUL(dd, l);                            \
655     }
656 
decode_ptr(arj_decode_t * decode_data)657 static uint16_t decode_ptr(arj_decode_t *decode_data)
658 {
659     uint16_t c, width, plus, pwr;
660 
661     plus = 0;
662     pwr  = 1 << STRTP;
663     for (width = STRTP; width < STOPP; width++) {
664         ARJ_GETBIT(decode_data, c);
665         if (c == 0) {
666             break;
667         }
668         plus += pwr;
669         pwr <<= 1;
670     }
671     if (width != 0) {
672         ARJ_GETBITS(decode_data, c, width);
673     }
674     c += plus;
675     return c;
676 }
677 
decode_len(arj_decode_t * decode_data)678 static uint16_t decode_len(arj_decode_t *decode_data)
679 {
680     uint16_t c, width, plus, pwr;
681 
682     plus = 0;
683     pwr  = 1 << STRTL;
684     for (width = STRTL; width < STOPL; width++) {
685         ARJ_GETBIT(decode_data, c);
686         if (c == 0) {
687             break;
688         }
689         plus += pwr;
690         pwr <<= 1;
691     }
692     if (width != 0) {
693         ARJ_GETBITS(decode_data, c, width);
694     }
695     c += plus;
696     return c;
697 }
698 
decode_f(arj_metadata_t * metadata)699 static cl_error_t decode_f(arj_metadata_t *metadata)
700 {
701     cl_error_t ret;
702 
703     arj_decode_t decode_data, *dd;
704     uint32_t count = 0, out_ptr = 0;
705     int16_t chr, i, j, pos;
706 
707     dd = &decode_data;
708     memset(&decode_data, 0, sizeof(decode_data));
709     decode_data.text = (unsigned char *)cli_calloc(DDICSIZ, 1);
710     if (!decode_data.text) {
711         return CL_EMEM;
712     }
713     decode_data.map       = metadata->map;
714     decode_data.offset    = metadata->offset;
715     decode_data.comp_size = metadata->comp_size;
716     ret                   = init_getbits(&decode_data);
717     if (ret != CL_SUCCESS) {
718         free(decode_data.text);
719         metadata->offset = decode_data.offset;
720         return ret;
721     }
722     decode_data.getlen = decode_data.getbuf = 0;
723     decode_data.status                      = CL_SUCCESS;
724 
725     while (count < metadata->orig_size) {
726         chr = decode_len(&decode_data);
727         if (decode_data.status != CL_SUCCESS) {
728             free(decode_data.text);
729             metadata->offset = decode_data.offset;
730             return decode_data.status;
731         }
732         if (chr == 0) {
733             ARJ_GETBITS(dd, chr, CHAR_BIT);
734             if (decode_data.status != CL_SUCCESS) {
735                 free(decode_data.text);
736                 metadata->offset = decode_data.offset;
737                 return decode_data.status;
738             }
739             decode_data.text[out_ptr] = (unsigned char)chr;
740             count++;
741             if (++out_ptr >= DDICSIZ) {
742                 out_ptr = 0;
743                 if (write_text(metadata->ofd, decode_data.text, DDICSIZ) != CL_SUCCESS) {
744                     free(decode_data.text);
745                     metadata->offset = decode_data.offset;
746                     return CL_EWRITE;
747                 }
748             }
749         } else {
750             j = chr - 1 + THRESHOLD;
751             count += j;
752             pos = decode_ptr(&decode_data);
753             if (decode_data.status != CL_SUCCESS) {
754                 free(decode_data.text);
755                 metadata->offset = decode_data.offset;
756                 return decode_data.status;
757             }
758             if ((i = out_ptr - pos - 1) < 0) {
759                 i += DDICSIZ;
760             }
761             if ((i >= DDICSIZ) || (i < 0)) {
762                 cli_dbgmsg("UNARJ: bounds exceeded - probably a corrupted file.\n");
763                 break;
764             }
765             while (j-- > 0) {
766                 decode_data.text[out_ptr] = decode_data.text[i];
767                 if (++out_ptr >= DDICSIZ) {
768                     out_ptr = 0;
769                     if (write_text(metadata->ofd, decode_data.text, DDICSIZ) != CL_SUCCESS) {
770                         free(decode_data.text);
771                         metadata->offset = decode_data.offset;
772                         return CL_EWRITE;
773                     }
774                 }
775                 if (++i >= DDICSIZ) {
776                     i = 0;
777                 }
778             }
779         }
780     }
781     if (out_ptr != 0) {
782         write_text(metadata->ofd, decode_data.text, out_ptr);
783     }
784 
785     free(decode_data.text);
786     metadata->offset = decode_data.offset;
787     return CL_SUCCESS;
788 }
789 
arj_unstore(arj_metadata_t * metadata,int ofd,uint32_t len)790 static cl_error_t arj_unstore(arj_metadata_t *metadata, int ofd, uint32_t len)
791 {
792     const unsigned char *data;
793     uint32_t rem;
794     unsigned int todo;
795     size_t count;
796 
797     cli_dbgmsg("in arj_unstore\n");
798     rem = len;
799 
800     while (rem > 0) {
801         todo = (unsigned int)MIN(8192, rem);
802         data = fmap_need_off_once_len(metadata->map, metadata->offset, todo, &count);
803         if (!data || !count) {
804             /* Truncated file, not enough bytes available */
805             return CL_EFORMAT;
806         }
807         metadata->offset += count;
808         if (cli_writen(ofd, data, count) != count) {
809             /* File writing problem */
810             return CL_EWRITE;
811         }
812         rem -= count;
813     }
814     return CL_SUCCESS;
815 }
816 
is_arj_archive(arj_metadata_t * metadata)817 static int is_arj_archive(arj_metadata_t *metadata)
818 {
819     const char header_id[2] = {0x60, 0xea};
820     const char *mark;
821 
822     mark = fmap_need_off_once(metadata->map, metadata->offset, 2);
823     if (!mark)
824         return FALSE;
825     metadata->offset += 2;
826     if (memcmp(&mark[0], &header_id[0], 2) == 0) {
827         return TRUE;
828     }
829     cli_dbgmsg("Not an ARJ archive\n");
830     return FALSE;
831 }
832 
arj_read_main_header(arj_metadata_t * metadata)833 static int arj_read_main_header(arj_metadata_t *metadata)
834 {
835     uint16_t header_size, count;
836     arj_main_hdr_t main_hdr;
837     const char *filename = NULL;
838     const char *comment  = NULL;
839     struct text_norm_state fnstate, comstate;
840     unsigned char *fnnorm  = NULL;
841     unsigned char *comnorm = NULL;
842     uint32_t ret           = TRUE;
843 
844     size_t filename_max_len = 0;
845     size_t filename_len     = 0;
846     size_t comment_max_len  = 0;
847     size_t comment_len      = 0;
848     size_t orig_offset      = metadata->offset;
849 
850     if (fmap_readn(metadata->map, &header_size, metadata->offset, 2) != 2)
851         return FALSE;
852 
853     metadata->offset += 2;
854     header_size = le16_to_host(header_size);
855     cli_dbgmsg("Header Size: %d\n", header_size);
856     if (header_size == 0) {
857         /* End of archive */
858         ret = FALSE;
859         goto done;
860     }
861     if (header_size > HEADERSIZE_MAX) {
862         cli_dbgmsg("arj_read_header: invalid header_size: %u\n ", header_size);
863         ret = FALSE;
864         goto done;
865     }
866     if ((header_size + sizeof(header_size)) > (metadata->map->len - metadata->offset)) {
867         cli_dbgmsg("arj_read_header: invalid header_size: %u, exceeds length of file.\n", header_size);
868         ret = FALSE;
869         goto done;
870     }
871     if (fmap_readn(metadata->map, &main_hdr, metadata->offset, 30) != 30) {
872         ret = FALSE;
873         goto done;
874     }
875     metadata->offset += 30;
876 
877     cli_dbgmsg("ARJ Main File Header\n");
878     cli_dbgmsg("First Header Size: %d\n", main_hdr.first_hdr_size);
879     cli_dbgmsg("Version: %d\n", main_hdr.version);
880     cli_dbgmsg("Min version: %d\n", main_hdr.min_version);
881     cli_dbgmsg("Host OS: %d\n", main_hdr.host_os);
882     cli_dbgmsg("Flags: 0x%x\n", main_hdr.flags);
883     cli_dbgmsg("Security version: %d\n", main_hdr.security_version);
884     cli_dbgmsg("File type: %d\n", main_hdr.file_type);
885 
886     if (main_hdr.first_hdr_size < 30) {
887         cli_dbgmsg("Format error. First Header Size < 30\n");
888         ret = FALSE;
889         goto done;
890     }
891     if (main_hdr.first_hdr_size > 30) {
892         metadata->offset += main_hdr.first_hdr_size - 30;
893     }
894 
895     filename_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
896     if (filename_max_len > header_size) {
897         cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
898         ret = FALSE;
899         goto done;
900     }
901     if (filename_max_len > 0) {
902         fnnorm   = cli_calloc(sizeof(unsigned char), filename_max_len + 1);
903         filename = fmap_need_offstr(metadata->map, metadata->offset, filename_max_len + 1);
904         if (!filename || !fnnorm) {
905             cli_dbgmsg("UNARJ: Unable to allocate memory for filename\n");
906             ret = FALSE;
907             goto done;
908         }
909         filename_len = CLI_STRNLEN(filename, filename_max_len);
910     }
911     metadata->offset += filename_len + 1;
912 
913     comment_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
914     if (comment_max_len > header_size) {
915         cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
916         ret = FALSE;
917         goto done;
918     }
919     if (comment_max_len > 0) {
920         comnorm = cli_calloc(sizeof(unsigned char), comment_max_len + 1);
921         comment = fmap_need_offstr(metadata->map, metadata->offset, comment_max_len + 1);
922         if (!comment || !comnorm) {
923             cli_dbgmsg("UNARJ: Unable to allocate memory for comment\n");
924             ret = FALSE;
925             goto done;
926         }
927         comment_len = CLI_STRNLEN(comment, comment_max_len);
928     }
929     metadata->offset += comment_len + 1;
930 
931     text_normalize_init(&fnstate, fnnorm, filename_max_len);
932     text_normalize_init(&comstate, comnorm, comment_max_len);
933 
934     text_normalize_buffer(&fnstate, (const unsigned char *)filename, filename_len);
935     text_normalize_buffer(&comstate, (const unsigned char *)comment, comment_len);
936 
937     cli_dbgmsg("Filename: %s\n", fnnorm);
938     cli_dbgmsg("Comment: %s\n", comnorm);
939 
940     metadata->offset += 4; /* crc */
941     /* Skip past any extended header data */
942     for (;;) {
943         const uint16_t *countp = fmap_need_off_once(metadata->map, metadata->offset, 2);
944         if (!countp) {
945             ret = FALSE;
946             goto done;
947         }
948         count = cli_readint16(countp);
949         metadata->offset += 2;
950         cli_dbgmsg("Extended header size: %d\n", count);
951         if (count == 0) {
952             break;
953         }
954         /* Skip extended header + 4byte CRC */
955         metadata->offset += count + 4;
956     }
957 
958 done:
959 
960     if (fnnorm) {
961         free(fnnorm);
962         fnnorm = NULL;
963     }
964 
965     if (comnorm) {
966         free(comnorm);
967         comnorm = NULL;
968     }
969     return ret;
970 }
971 
arj_read_file_header(arj_metadata_t * metadata)972 static cl_error_t arj_read_file_header(arj_metadata_t *metadata)
973 {
974     uint16_t header_size, count;
975     const char *filename, *comment;
976     arj_file_hdr_t file_hdr;
977     struct text_norm_state fnstate, comstate;
978     unsigned char *fnnorm  = NULL;
979     unsigned char *comnorm = NULL;
980     cl_error_t ret         = CL_SUCCESS;
981 
982     size_t filename_max_len = 0;
983     size_t filename_len     = 0;
984     size_t comment_max_len  = 0;
985     size_t comment_len      = 0;
986     size_t orig_offset      = metadata->offset;
987 
988     if (fmap_readn(metadata->map, &header_size, metadata->offset, 2) != 2)
989         return CL_EFORMAT;
990     header_size = le16_to_host(header_size);
991     metadata->offset += 2;
992 
993     cli_dbgmsg("Header Size: %d\n", header_size);
994     if (header_size == 0) {
995         /* End of archive */
996         ret = CL_BREAK;
997         goto done;
998     }
999     if (header_size > HEADERSIZE_MAX) {
1000         cli_dbgmsg("arj_read_file_header: invalid header_size: %u\n ", header_size);
1001         ret = CL_EFORMAT;
1002         goto done;
1003     }
1004     if ((header_size + sizeof(header_size)) > (metadata->map->len - metadata->offset)) {
1005         cli_dbgmsg("arj_read_file_header: invalid header_size: %u, exceeds length of file.\n", header_size);
1006         ret = CL_EFORMAT;
1007         goto done;
1008     }
1009     if (fmap_readn(metadata->map, &file_hdr, metadata->offset, 30) != 30) {
1010         ret = CL_EFORMAT;
1011         goto done;
1012     }
1013     metadata->offset += 30;
1014     file_hdr.comp_size = le32_to_host(file_hdr.comp_size);
1015     file_hdr.orig_size = le32_to_host(file_hdr.orig_size);
1016 
1017     cli_dbgmsg("ARJ File Header\n");
1018     cli_dbgmsg("First Header Size: %d\n", file_hdr.first_hdr_size);
1019     cli_dbgmsg("Version: %d\n", file_hdr.version);
1020     cli_dbgmsg("Min version: %d\n", file_hdr.min_version);
1021     cli_dbgmsg("Host OS: %d\n", file_hdr.host_os);
1022     cli_dbgmsg("Flags: 0x%x\n", file_hdr.flags);
1023     cli_dbgmsg("Method: %d\n", file_hdr.method);
1024     cli_dbgmsg("File type: %d\n", file_hdr.file_type);
1025     cli_dbgmsg("File type: %d\n", file_hdr.password_mod);
1026     cli_dbgmsg("Compressed size: %u\n", file_hdr.comp_size);
1027     cli_dbgmsg("Original size: %u\n", file_hdr.orig_size);
1028 
1029     if (file_hdr.first_hdr_size < 30) {
1030         cli_dbgmsg("Format error. First Header Size < 30\n");
1031         ret = CL_EFORMAT;
1032         goto done;
1033     }
1034 
1035     /* Note: this skips past any extended file start position data (multi-volume) */
1036     if (file_hdr.first_hdr_size > 30) {
1037         metadata->offset += file_hdr.first_hdr_size - 30;
1038     }
1039 
1040     filename_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
1041     if (filename_max_len > header_size) {
1042         cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
1043         ret = CL_EFORMAT;
1044         goto done;
1045     }
1046     if (filename_max_len > 0) {
1047         fnnorm = cli_calloc(sizeof(unsigned char), filename_max_len + 1);
1048         if (!fnnorm) {
1049             cli_dbgmsg("UNARJ: Unable to allocate memory for filename\n");
1050             ret = CL_EMEM;
1051             goto done;
1052         }
1053         filename = fmap_need_offstr(metadata->map, metadata->offset, filename_max_len + 1);
1054         if (!filename) {
1055             cli_dbgmsg("UNARJ: Filename is out of file\n");
1056             ret = CL_EFORMAT;
1057             goto done;
1058         }
1059         filename_len = CLI_STRNLEN(filename, filename_max_len);
1060     }
1061     metadata->offset += filename_len + 1;
1062 
1063     comment_max_len = (header_size + sizeof(header_size)) - (metadata->offset - orig_offset);
1064     if (comment_max_len > header_size) {
1065         cli_dbgmsg("UNARJ: Format error. First Header Size invalid\n");
1066         ret = CL_EFORMAT;
1067         goto done;
1068     }
1069     if (comment_max_len > 0) {
1070         comnorm = cli_calloc(sizeof(unsigned char), comment_max_len + 1);
1071         if (!comnorm) {
1072             cli_dbgmsg("UNARJ: Unable to allocate memory for comment\n");
1073             ret = CL_EMEM;
1074             goto done;
1075         }
1076         comment = fmap_need_offstr(metadata->map, metadata->offset, comment_max_len + 1);
1077         if (!comment) {
1078             cli_dbgmsg("UNARJ: comment is out of file\n");
1079             ret = CL_EFORMAT;
1080             goto done;
1081         }
1082         comment_len += CLI_STRNLEN(comment, comment_max_len);
1083     }
1084     metadata->offset += comment_len + 1;
1085 
1086     text_normalize_init(&fnstate, fnnorm, filename_max_len);
1087     text_normalize_init(&comstate, comnorm, comment_max_len);
1088 
1089     text_normalize_buffer(&fnstate, (const unsigned char *)filename, filename_len);
1090     text_normalize_buffer(&comstate, (const unsigned char *)comment, comment_len);
1091 
1092     cli_dbgmsg("Filename: %s\n", fnnorm);
1093     cli_dbgmsg("Comment: %s\n", comnorm);
1094     metadata->filename = CLI_STRNDUP(filename, filename_len);
1095 
1096     /* Skip CRC */
1097     metadata->offset += 4;
1098 
1099     /* Skip past any extended header data */
1100     for (;;) {
1101         const uint16_t *countp = fmap_need_off_once(metadata->map, metadata->offset, 2);
1102         if (!countp) {
1103             if (metadata->filename)
1104                 free(metadata->filename);
1105             metadata->filename = NULL;
1106             ret                = CL_EFORMAT;
1107             goto done;
1108         }
1109         count = cli_readint16(countp);
1110         metadata->offset += 2;
1111         cli_dbgmsg("Extended header size: %d\n", count);
1112         if (count == 0) {
1113             break;
1114         }
1115         /* Skip extended header + 4byte CRC */
1116         metadata->offset += count + 4;
1117     }
1118     metadata->comp_size = file_hdr.comp_size;
1119     metadata->orig_size = file_hdr.orig_size;
1120     metadata->method    = file_hdr.method;
1121     metadata->encrypted = ((file_hdr.flags & GARBLE_FLAG) != 0) ? TRUE : FALSE;
1122     metadata->ofd       = -1;
1123     if (!metadata->filename) {
1124         ret = CL_EMEM;
1125         goto done;
1126     }
1127 
1128 done:
1129 
1130     if (fnnorm) {
1131         free(fnnorm);
1132         fnnorm = NULL;
1133     }
1134 
1135     if (comnorm) {
1136         free(comnorm);
1137         comnorm = NULL;
1138     }
1139     return ret;
1140 }
1141 
cli_unarj_open(fmap_t * map,const char * dirname,arj_metadata_t * metadata)1142 cl_error_t cli_unarj_open(fmap_t *map, const char *dirname, arj_metadata_t *metadata)
1143 {
1144     UNUSEDPARAM(dirname);
1145     cli_dbgmsg("in cli_unarj_open\n");
1146     metadata->map    = map;
1147     metadata->offset = 0;
1148     if (!is_arj_archive(metadata)) {
1149         cli_dbgmsg("Not in ARJ format\n");
1150         return CL_EFORMAT;
1151     }
1152     if (!arj_read_main_header(metadata)) {
1153         cli_dbgmsg("Failed to read main header\n");
1154         return CL_EFORMAT;
1155     }
1156     return CL_SUCCESS;
1157 }
1158 
cli_unarj_prepare_file(const char * dirname,arj_metadata_t * metadata)1159 cl_error_t cli_unarj_prepare_file(const char *dirname, arj_metadata_t *metadata)
1160 {
1161     cli_dbgmsg("in cli_unarj_prepare_file\n");
1162     if (!metadata || !dirname) {
1163         return CL_ENULLARG;
1164     }
1165     /* Each file is preceded by the ARJ file marker */
1166     if (!is_arj_archive(metadata)) {
1167         cli_dbgmsg("Not in ARJ format\n");
1168         return CL_EFORMAT;
1169     }
1170     return arj_read_file_header(metadata);
1171 }
1172 
cli_unarj_extract_file(const char * dirname,arj_metadata_t * metadata)1173 cl_error_t cli_unarj_extract_file(const char *dirname, arj_metadata_t *metadata)
1174 {
1175     cl_error_t ret = CL_SUCCESS;
1176     char filename[1024];
1177 
1178     cli_dbgmsg("in cli_unarj_extract_file\n");
1179     if (!metadata || !dirname) {
1180         return CL_ENULLARG;
1181     }
1182 
1183     if (metadata->encrypted) {
1184         cli_dbgmsg("PASSWORDed file (skipping)\n");
1185         metadata->offset += metadata->comp_size;
1186         cli_dbgmsg("Target offset: %lu\n", (unsigned long int)metadata->offset);
1187         return CL_SUCCESS;
1188     }
1189 
1190     snprintf(filename, 1024, "%s" PATHSEP "file.uar", dirname);
1191     cli_dbgmsg("Filename: %s\n", filename);
1192     metadata->ofd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0600);
1193     if (metadata->ofd < 0) {
1194         return CL_EOPEN;
1195     }
1196     switch (metadata->method) {
1197         case 0:
1198             ret = arj_unstore(metadata, metadata->ofd, metadata->comp_size);
1199             break;
1200         case 1:
1201         case 2:
1202         case 3:
1203             ret = decode(metadata);
1204             break;
1205         case 4:
1206             ret = decode_f(metadata);
1207             break;
1208         default:
1209             ret = CL_EFORMAT;
1210             break;
1211     }
1212     return ret;
1213 }
1214