1 /* Copyright 2015 the unarr project authors (see AUTHORS file).
2    License: LGPLv3 */
3 
4 /* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRARParser.m */
5 
6 #include "rar.h"
7 
uint8le(unsigned char * data)8 static inline uint8_t uint8le(unsigned char *data) { return data[0]; }
uint16le(unsigned char * data)9 static inline uint16_t uint16le(unsigned char *data) { return data[0] | data[1] << 8; }
uint32le(unsigned char * data)10 static inline uint32_t uint32le(unsigned char *data) { return data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24; }
11 
rar_parse_header(ar_archive * ar,struct rar_header * header)12 bool rar_parse_header(ar_archive *ar, struct rar_header *header)
13 {
14     unsigned char header_data[7];
15     size_t read = ar_read(ar->stream, header_data, sizeof(header_data));
16     if (read == 0) {
17         ar->at_eof = true;
18         return false;
19     }
20     if (read < sizeof(header_data))
21         return false;
22 
23     header->crc = uint16le(header_data + 0);
24     header->type = uint8le(header_data + 2);
25     header->flags = uint16le(header_data + 3);
26     header->size = uint16le(header_data + 5);
27 
28     header->datasize = 0;
29     if ((header->flags & LHD_LONG_BLOCK) || header->type == 0x74) {
30         unsigned char size_data[4];
31         if (!(header->flags & LHD_LONG_BLOCK))
32             log("File header without LHD_LONG_BLOCK set");
33         read += ar_read(ar->stream, size_data, sizeof(size_data));
34         if (read < sizeof(header_data) + sizeof(size_data))
35             return false;
36         header->datasize = uint32le(size_data);
37     }
38 
39     if (header->size < read) {
40         warn("Invalid header size %d", header->size);
41         return false;
42     }
43 
44     return true;
45 }
46 
rar_check_header_crc(ar_archive * ar)47 bool rar_check_header_crc(ar_archive *ar)
48 {
49     unsigned char buffer[256];
50     uint16_t crc16, size;
51     uint32_t crc32;
52 
53     if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET))
54         return false;
55     if (ar_read(ar->stream, buffer, 7) != 7)
56         return false;
57 
58     crc16 = uint16le(buffer + 0);
59     size = uint16le(buffer + 5);
60     if (size < 7)
61         return false;
62     size -= 7;
63 
64     crc32 = ar_crc32(0, buffer + 2, 5);
65     while (size > 0) {
66         if (ar_read(ar->stream, buffer, smin(size, sizeof(buffer))) != smin(size, sizeof(buffer)))
67             return false;
68         crc32 = ar_crc32(crc32, buffer, smin(size, sizeof(buffer)));
69         size -= (uint16_t)smin(size, sizeof(buffer));
70     }
71     return (crc32 & 0xFFFF) == crc16;
72 }
73 
rar_parse_header_entry(ar_archive_rar * rar,struct rar_header * header,struct rar_entry * entry)74 bool rar_parse_header_entry(ar_archive_rar *rar, struct rar_header *header, struct rar_entry *entry)
75 {
76     unsigned char data[21];
77     if (ar_read(rar->super.stream, data, sizeof(data)) != sizeof(data))
78         return false;
79 
80     entry->size = uint32le(data + 0);
81     entry->os = uint8le(data + 4);
82     entry->crc = uint32le(data + 5);
83     entry->dosdate = uint32le(data + 9);
84     entry->version = uint8le(data + 13);
85     entry->method = uint8le(data + 14);
86     entry->namelen = uint16le(data + 15);
87     entry->attrs = uint32le(data + 17);
88     if ((header->flags & LHD_LARGE)) {
89         unsigned char more_data[8];
90         if (ar_read(rar->super.stream, more_data, sizeof(more_data)) != sizeof(more_data))
91             return false;
92         header->datasize += (uint64_t)uint32le(more_data + 0);
93         entry->size += (uint64_t)uint32le(more_data + 4);
94     }
95     if (!ar_skip(rar->super.stream, entry->namelen))
96         return false;
97     if ((header->flags & LHD_SALT)) {
98         log("Skipping LHD_SALT");
99         ar_skip(rar->super.stream, 8);
100     }
101 
102     rar->entry.version = entry->version;
103     rar->entry.method = entry->method;
104     rar->entry.crc = entry->crc;
105     rar->entry.header_size = header->size;
106     rar->entry.solid = entry->version < 20 ? (rar->archive_flags & MHD_SOLID) : (header->flags & LHD_SOLID);
107     free(rar->entry.name);
108     rar->entry.name = NULL;
109 
110     return true;
111 }
112 
113 /* this seems to be what RAR considers "Unicode" */
rar_conv_unicode_to_utf8(const char * data,uint16_t len)114 static char *rar_conv_unicode_to_utf8(const char *data, uint16_t len)
115 {
116 #define Check(cond) if (!(cond)) { free(str); return NULL; } else ((void)0)
117 
118     uint8_t highbyte, flagbyte, flagbits, size, length, i;
119     const uint8_t *in = (uint8_t *)data + strlen(data) + 1;
120     const uint8_t *end_in = (uint8_t *)data + len;
121     char *str = calloc(len + 1, 3);
122     char *out = str;
123     char *end_out = str + len * 3;
124 
125     if (!str)
126         return NULL;
127     if (end_in - in <= 1) {
128         memcpy(str, data, len);
129         return str;
130     }
131 
132     highbyte = *in++;
133     flagbyte = 0;
134     flagbits = 0;
135     size = 0;
136 
137     while (in < end_in && out < end_out) {
138         if (flagbits == 0) {
139             flagbyte = *in++;
140             flagbits = 8;
141         }
142         flagbits -= 2;
143         switch ((flagbyte >> flagbits) & 3) {
144         case 0:
145             Check(in + 1 <= end_in);
146             out += ar_conv_rune_to_utf8(*in++, out, end_out - out);
147             size++;
148             break;
149         case 1:
150             Check(in + 1 <= end_in);
151             out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | *in++, out, end_out - out);
152             size++;
153             break;
154         case 2:
155             Check(in + 2 <= end_in);
156             out += ar_conv_rune_to_utf8(((uint16_t)*(in + 1) << 8) | *in, out, end_out - out);
157             in += 2;
158             size++;
159             break;
160         case 3:
161             Check(in + 1 <= end_in);
162             length = *in++;
163             if ((length & 0x80)) {
164                 uint8_t correction = *in++;
165                 for (i = 0; i < (length & 0x7F) + 2; i++) {
166                     Check(size < len);
167                     out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | (data[size] + (correction & 0xFF)), out, end_out - out);
168                     size++;
169                 }
170             }
171             else {
172                 for (i = 0; i < (length & 0x7F) + 2; i++) {
173                     Check(size < len);
174                     out += ar_conv_rune_to_utf8(data[size], out, end_out - out);
175                     size++;
176                 }
177             }
178             break;
179         }
180     }
181 
182     return str;
183 
184 #undef Check
185 }
186 
rar_get_name(ar_archive * ar)187 const char *rar_get_name(ar_archive *ar)
188 {
189     ar_archive_rar *rar = (ar_archive_rar *)ar;
190     if (!rar->entry.name) {
191         unsigned char data[21];
192         uint16_t namelen;
193         char *name;
194 
195         struct rar_header header;
196         if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET))
197             return NULL;
198         if (!rar_parse_header(ar, &header))
199             return NULL;
200         if (ar_read(ar->stream, data, sizeof(data)) != sizeof(data))
201             return NULL;
202         if ((header.flags & LHD_LARGE) && !ar_skip(ar->stream, 8))
203             return NULL;
204 
205         namelen = uint16le(data + 15);
206         name = malloc(namelen + 1);
207         if (!name || ar_read(ar->stream, name, namelen) != namelen) {
208             free(name);
209             return NULL;
210         }
211         name[namelen] = '\0';
212 
213         if (!(header.flags & LHD_UNICODE)) {
214             rar->entry.name = ar_conv_dos_to_utf8(name);
215             free(name);
216         }
217         else if (namelen == strlen(name)) {
218             rar->entry.name = name;
219         }
220         else {
221             rar->entry.name = rar_conv_unicode_to_utf8(name, namelen);
222             free(name);
223         }
224         /* normalize path separators */
225         if (rar->entry.name) {
226             char *p = rar->entry.name;
227             while ((p = strchr(p, '\\')) != NULL) {
228                 *p = '/';
229             }
230         }
231 
232         if (!ar_seek(ar->stream, ar->entry_offset + rar->entry.header_size, SEEK_SET))
233             warn("Couldn't seek back to the end of the entry header");
234     }
235     return rar->entry.name;
236 }
237