1 /* Copyright 2015 the unarr project authors (see AUTHORS file).
2 License: LGPLv3 */
3
4 /* adapted from https://code.google.com/p/theunarchiver/source/browse/XADMaster/XADRARParser.m */
5
6 #include "rar.h"
7
uint8le(unsigned char * data)8 static inline uint8_t uint8le(unsigned char *data) { return data[0]; }
uint16le(unsigned char * data)9 static inline uint16_t uint16le(unsigned char *data) { return data[0] | data[1] << 8; }
uint32le(unsigned char * data)10 static inline uint32_t uint32le(unsigned char *data) { return data[0] | data[1] << 8 | data[2] << 16 | data[3] << 24; }
11
rar_parse_header(ar_archive * ar,struct rar_header * header)12 bool rar_parse_header(ar_archive *ar, struct rar_header *header)
13 {
14 unsigned char header_data[7];
15 size_t read = ar_read(ar->stream, header_data, sizeof(header_data));
16 if (read == 0) {
17 ar->at_eof = true;
18 return false;
19 }
20 if (read < sizeof(header_data))
21 return false;
22
23 header->crc = uint16le(header_data + 0);
24 header->type = uint8le(header_data + 2);
25 header->flags = uint16le(header_data + 3);
26 header->size = uint16le(header_data + 5);
27
28 header->datasize = 0;
29 if ((header->flags & LHD_LONG_BLOCK) || header->type == 0x74) {
30 unsigned char size_data[4];
31 if (!(header->flags & LHD_LONG_BLOCK))
32 log("File header without LHD_LONG_BLOCK set");
33 read += ar_read(ar->stream, size_data, sizeof(size_data));
34 if (read < sizeof(header_data) + sizeof(size_data))
35 return false;
36 header->datasize = uint32le(size_data);
37 }
38
39 if (header->size < read) {
40 warn("Invalid header size %d", header->size);
41 return false;
42 }
43
44 return true;
45 }
46
rar_check_header_crc(ar_archive * ar)47 bool rar_check_header_crc(ar_archive *ar)
48 {
49 unsigned char buffer[256];
50 uint16_t crc16, size;
51 uint32_t crc32;
52
53 if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET))
54 return false;
55 if (ar_read(ar->stream, buffer, 7) != 7)
56 return false;
57
58 crc16 = uint16le(buffer + 0);
59 size = uint16le(buffer + 5);
60 if (size < 7)
61 return false;
62 size -= 7;
63
64 crc32 = ar_crc32(0, buffer + 2, 5);
65 while (size > 0) {
66 if (ar_read(ar->stream, buffer, smin(size, sizeof(buffer))) != smin(size, sizeof(buffer)))
67 return false;
68 crc32 = ar_crc32(crc32, buffer, smin(size, sizeof(buffer)));
69 size -= (uint16_t)smin(size, sizeof(buffer));
70 }
71 return (crc32 & 0xFFFF) == crc16;
72 }
73
rar_parse_header_entry(ar_archive_rar * rar,struct rar_header * header,struct rar_entry * entry)74 bool rar_parse_header_entry(ar_archive_rar *rar, struct rar_header *header, struct rar_entry *entry)
75 {
76 unsigned char data[21];
77 if (ar_read(rar->super.stream, data, sizeof(data)) != sizeof(data))
78 return false;
79
80 entry->size = uint32le(data + 0);
81 entry->os = uint8le(data + 4);
82 entry->crc = uint32le(data + 5);
83 entry->dosdate = uint32le(data + 9);
84 entry->version = uint8le(data + 13);
85 entry->method = uint8le(data + 14);
86 entry->namelen = uint16le(data + 15);
87 entry->attrs = uint32le(data + 17);
88 if ((header->flags & LHD_LARGE)) {
89 unsigned char more_data[8];
90 if (ar_read(rar->super.stream, more_data, sizeof(more_data)) != sizeof(more_data))
91 return false;
92 header->datasize += (uint64_t)uint32le(more_data + 0);
93 entry->size += (uint64_t)uint32le(more_data + 4);
94 }
95 if (!ar_skip(rar->super.stream, entry->namelen))
96 return false;
97 if ((header->flags & LHD_SALT)) {
98 log("Skipping LHD_SALT");
99 ar_skip(rar->super.stream, 8);
100 }
101
102 rar->entry.version = entry->version;
103 rar->entry.method = entry->method;
104 rar->entry.crc = entry->crc;
105 rar->entry.header_size = header->size;
106 rar->entry.solid = entry->version < 20 ? (rar->archive_flags & MHD_SOLID) : (header->flags & LHD_SOLID);
107 free(rar->entry.name);
108 rar->entry.name = NULL;
109
110 return true;
111 }
112
113 /* this seems to be what RAR considers "Unicode" */
rar_conv_unicode_to_utf8(const char * data,uint16_t len)114 static char *rar_conv_unicode_to_utf8(const char *data, uint16_t len)
115 {
116 #define Check(cond) if (!(cond)) { free(str); return NULL; } else ((void)0)
117
118 uint8_t highbyte, flagbyte, flagbits, size, length, i;
119 const uint8_t *in = (uint8_t *)data + strlen(data) + 1;
120 const uint8_t *end_in = (uint8_t *)data + len;
121 char *str = calloc(len + 1, 3);
122 char *out = str;
123 char *end_out = str + len * 3;
124
125 if (!str)
126 return NULL;
127 if (end_in - in <= 1) {
128 memcpy(str, data, len);
129 return str;
130 }
131
132 highbyte = *in++;
133 flagbyte = 0;
134 flagbits = 0;
135 size = 0;
136
137 while (in < end_in && out < end_out) {
138 if (flagbits == 0) {
139 flagbyte = *in++;
140 flagbits = 8;
141 }
142 flagbits -= 2;
143 switch ((flagbyte >> flagbits) & 3) {
144 case 0:
145 Check(in + 1 <= end_in);
146 out += ar_conv_rune_to_utf8(*in++, out, end_out - out);
147 size++;
148 break;
149 case 1:
150 Check(in + 1 <= end_in);
151 out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | *in++, out, end_out - out);
152 size++;
153 break;
154 case 2:
155 Check(in + 2 <= end_in);
156 out += ar_conv_rune_to_utf8(((uint16_t)*(in + 1) << 8) | *in, out, end_out - out);
157 in += 2;
158 size++;
159 break;
160 case 3:
161 Check(in + 1 <= end_in);
162 length = *in++;
163 if ((length & 0x80)) {
164 uint8_t correction = *in++;
165 for (i = 0; i < (length & 0x7F) + 2; i++) {
166 Check(size < len);
167 out += ar_conv_rune_to_utf8(((uint16_t)highbyte << 8) | (data[size] + (correction & 0xFF)), out, end_out - out);
168 size++;
169 }
170 }
171 else {
172 for (i = 0; i < (length & 0x7F) + 2; i++) {
173 Check(size < len);
174 out += ar_conv_rune_to_utf8(data[size], out, end_out - out);
175 size++;
176 }
177 }
178 break;
179 }
180 }
181
182 return str;
183
184 #undef Check
185 }
186
rar_get_name(ar_archive * ar)187 const char *rar_get_name(ar_archive *ar)
188 {
189 ar_archive_rar *rar = (ar_archive_rar *)ar;
190 if (!rar->entry.name) {
191 unsigned char data[21];
192 uint16_t namelen;
193 char *name;
194
195 struct rar_header header;
196 if (!ar_seek(ar->stream, ar->entry_offset, SEEK_SET))
197 return NULL;
198 if (!rar_parse_header(ar, &header))
199 return NULL;
200 if (ar_read(ar->stream, data, sizeof(data)) != sizeof(data))
201 return NULL;
202 if ((header.flags & LHD_LARGE) && !ar_skip(ar->stream, 8))
203 return NULL;
204
205 namelen = uint16le(data + 15);
206 name = malloc(namelen + 1);
207 if (!name || ar_read(ar->stream, name, namelen) != namelen) {
208 free(name);
209 return NULL;
210 }
211 name[namelen] = '\0';
212
213 if (!(header.flags & LHD_UNICODE)) {
214 rar->entry.name = ar_conv_dos_to_utf8(name);
215 free(name);
216 }
217 else if (namelen == strlen(name)) {
218 rar->entry.name = name;
219 }
220 else {
221 rar->entry.name = rar_conv_unicode_to_utf8(name, namelen);
222 free(name);
223 }
224 /* normalize path separators */
225 if (rar->entry.name) {
226 char *p = rar->entry.name;
227 while ((p = strchr(p, '\\')) != NULL) {
228 *p = '/';
229 }
230 }
231
232 if (!ar_seek(ar->stream, ar->entry_offset + rar->entry.header_size, SEEK_SET))
233 warn("Couldn't seek back to the end of the entry header");
234 }
235 return rar->entry.name;
236 }
237