1 /*-
2 * Copyright 2016 Vsevolod Stakhov
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "config.h"
18 #include "message.h"
19 #include "task.h"
20 #include "archives.h"
21 #include "libmime/mime_encoding.h"
22 #include <unicode/uchar.h>
23 #include <unicode/utf8.h>
24 #include <unicode/utf16.h>
25 #include <unicode/ucnv.h>
26
27 #define msg_debug_archive(...) rspamd_conditional_debug_fast (NULL, NULL, \
28 rspamd_archive_log_id, "archive", task->task_pool->tag.uid, \
29 G_STRFUNC, \
30 __VA_ARGS__)
31
INIT_LOG_MODULE(archive)32 INIT_LOG_MODULE(archive)
33
34 static void
35 rspamd_archive_dtor (gpointer p)
36 {
37 struct rspamd_archive *arch = p;
38 struct rspamd_archive_file *f;
39 guint i;
40
41 for (i = 0; i < arch->files->len; i ++) {
42 f = g_ptr_array_index (arch->files, i);
43
44 if (f->fname) {
45 g_string_free (f->fname, TRUE);
46 }
47
48 g_free (f);
49 }
50
51 g_ptr_array_free (arch->files, TRUE);
52 }
53
54 static GString *
rspamd_archive_file_try_utf(struct rspamd_task * task,const gchar * in,gsize inlen)55 rspamd_archive_file_try_utf (struct rspamd_task *task,
56 const gchar *in, gsize inlen)
57 {
58 const gchar *charset = NULL, *p, *end;
59 GString *res;
60
61 charset = rspamd_mime_charset_find_by_content (in, inlen, TRUE);
62
63 if (charset) {
64 UChar *tmp;
65 UErrorCode uc_err = U_ZERO_ERROR;
66 gint32 r, clen, dlen;
67 struct rspamd_charset_converter *conv;
68 UConverter *utf8_converter;
69
70 conv = rspamd_mime_get_converter_cached (charset, task->task_pool,
71 TRUE, &uc_err);
72 utf8_converter = rspamd_get_utf8_converter ();
73
74 if (conv == NULL) {
75 msg_info_task ("cannot open converter for %s: %s",
76 charset, u_errorName (uc_err));
77
78 return NULL;
79 }
80
81 tmp = g_malloc (sizeof (*tmp) * (inlen + 1));
82 r = rspamd_converter_to_uchars (conv, tmp, inlen + 1,
83 in, inlen, &uc_err);
84 if (!U_SUCCESS (uc_err)) {
85 msg_info_task ("cannot convert data to unicode from %s: %s",
86 charset, u_errorName (uc_err));
87 g_free (tmp);
88
89 return NULL;
90 }
91
92 clen = ucnv_getMaxCharSize (utf8_converter);
93 dlen = UCNV_GET_MAX_BYTES_FOR_STRING (r, clen);
94 res = g_string_sized_new (dlen);
95 r = ucnv_fromUChars (utf8_converter, res->str, dlen, tmp, r, &uc_err);
96
97 if (!U_SUCCESS (uc_err)) {
98 msg_info_task ("cannot convert data from unicode from %s: %s",
99 charset, u_errorName (uc_err));
100 g_free (tmp);
101 g_string_free (res, TRUE);
102
103 return NULL;
104 }
105
106 g_free (tmp);
107 res->len = r;
108
109 msg_debug_archive ("converted from %s to UTF-8 inlen: %z, outlen: %d",
110 charset, inlen, r);
111 }
112 else {
113 /* Convert unsafe characters to '?' */
114 res = g_string_sized_new (inlen);
115 p = in;
116 end = in + inlen;
117
118 while (p < end) {
119 if (g_ascii_isgraph (*p)) {
120 g_string_append_c (res, *p);
121 }
122 else {
123 g_string_append_c (res, '?');
124 }
125
126 p ++;
127 }
128 }
129
130 return res;
131 }
132
133 static void
rspamd_archive_process_zip(struct rspamd_task * task,struct rspamd_mime_part * part)134 rspamd_archive_process_zip (struct rspamd_task *task,
135 struct rspamd_mime_part *part)
136 {
137 const guchar *p, *start, *end, *eocd = NULL, *cd;
138 const guint32 eocd_magic = 0x06054b50, cd_basic_len = 46;
139 const guchar cd_magic[] = {0x50, 0x4b, 0x01, 0x02};
140 const guint max_processed = 1024;
141 guint32 cd_offset, cd_size, comp_size, uncomp_size, processed = 0;
142 guint16 extra_len, fname_len, comment_len;
143 struct rspamd_archive *arch;
144 struct rspamd_archive_file *f = NULL;
145
146 /* Zip files have interesting data at the end of archive */
147 p = part->parsed_data.begin + part->parsed_data.len - 1;
148 start = part->parsed_data.begin;
149 end = p;
150
151 /* Search for EOCD:
152 * 22 bytes is a typical size of eocd without a comment and
153 * end points one byte after the last character
154 */
155 p -= 21;
156
157 while (p > start + sizeof (guint32)) {
158 guint32 t;
159
160 if (processed > max_processed) {
161 break;
162 }
163
164 /* XXX: not an efficient approach */
165 memcpy (&t, p, sizeof (t));
166
167 if (GUINT32_FROM_LE (t) == eocd_magic) {
168 eocd = p;
169 break;
170 }
171
172 p --;
173 processed ++;
174 }
175
176
177 if (eocd == NULL) {
178 /* Not a zip file */
179 msg_info_task ("zip archive is invalid (no EOCD)");
180
181 return;
182 }
183
184 if (end - eocd < 21) {
185 msg_info_task ("zip archive is invalid (short EOCD)");
186
187 return;
188 }
189
190
191 memcpy (&cd_size, eocd + 12, sizeof (cd_size));
192 cd_size = GUINT32_FROM_LE (cd_size);
193 memcpy (&cd_offset, eocd + 16, sizeof (cd_offset));
194 cd_offset = GUINT32_FROM_LE (cd_offset);
195
196 /* We need to check sanity as well */
197 if (cd_offset + cd_size > (guint)(eocd - start)) {
198 msg_info_task ("zip archive is invalid (bad size/offset for CD)");
199
200 return;
201 }
202
203 cd = start + cd_offset;
204
205 arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
206 arch->files = g_ptr_array_new ();
207 arch->type = RSPAMD_ARCHIVE_ZIP;
208 rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
209 arch);
210
211 while (cd < start + cd_offset + cd_size) {
212 guint16 flags;
213
214 /* Read central directory record */
215 if (eocd - cd < cd_basic_len ||
216 memcmp (cd, cd_magic, sizeof (cd_magic)) != 0) {
217 msg_info_task ("zip archive is invalid (bad cd record)");
218
219 return;
220 }
221
222 memcpy (&flags, cd + 8, sizeof (guint16));
223 flags = GUINT16_FROM_LE (flags);
224 memcpy (&comp_size, cd + 20, sizeof (guint32));
225 comp_size = GUINT32_FROM_LE (comp_size);
226 memcpy (&uncomp_size, cd + 24, sizeof (guint32));
227 uncomp_size = GUINT32_FROM_LE (uncomp_size);
228 memcpy (&fname_len, cd + 28, sizeof (fname_len));
229 fname_len = GUINT16_FROM_LE (fname_len);
230 memcpy (&extra_len, cd + 30, sizeof (extra_len));
231 extra_len = GUINT16_FROM_LE (extra_len);
232 memcpy (&comment_len, cd + 32, sizeof (comment_len));
233 comment_len = GUINT16_FROM_LE (comment_len);
234
235 if (cd + fname_len + comment_len + extra_len + cd_basic_len > eocd) {
236 msg_info_task ("zip archive is invalid (too large cd record)");
237
238 return;
239 }
240
241 f = g_malloc0 (sizeof (*f));
242 f->fname = rspamd_archive_file_try_utf (task,
243 cd + cd_basic_len, fname_len);
244 f->compressed_size = comp_size;
245 f->uncompressed_size = uncomp_size;
246
247 if (flags & 0x41u) {
248 f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
249 }
250
251 if (f->fname) {
252 g_ptr_array_add (arch->files, f);
253 msg_debug_archive ("found file in zip archive: %v", f->fname);
254 }
255 else {
256 g_free (f);
257
258 return;
259 }
260
261 /* Process extra fields */
262 const guchar *extra = cd + fname_len + cd_basic_len;
263 p = extra;
264
265 while (p + sizeof (guint16) * 2 < extra + extra_len) {
266 guint16 hid, hlen;
267
268 memcpy (&hid, p, sizeof (guint16));
269 hid = GUINT16_FROM_LE (hid);
270 memcpy (&hlen, p + sizeof (guint16), sizeof (guint16));
271 hlen = GUINT16_FROM_LE (hlen);
272
273 if (hid == 0x0017) {
274 f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
275 }
276
277 p += hlen + sizeof (guint16) * 2;
278 }
279
280 cd += fname_len + comment_len + extra_len + cd_basic_len;
281 }
282
283 part->part_type = RSPAMD_MIME_PART_ARCHIVE;
284 part->specific.arch = arch;
285
286 if (part->cd) {
287 arch->archive_name = &part->cd->filename;
288 }
289
290 arch->size = part->parsed_data.len;
291 }
292
293 static inline gint
rspamd_archive_rar_read_vint(const guchar * start,gsize remain,guint64 * res)294 rspamd_archive_rar_read_vint (const guchar *start, gsize remain, guint64 *res)
295 {
296 /*
297 * From http://www.rarlab.com/technote.htm:
298 * Variable length integer. Can include one or more bytes, where
299 * lower 7 bits of every byte contain integer data and highest bit
300 * in every byte is the continuation flag.
301 * If highest bit is 0, this is the last byte in sequence.
302 * So first byte contains 7 least significant bits of integer and
303 * continuation flag. Second byte, if present, contains next 7 bits and so on.
304 */
305 guint64 t = 0;
306 guint shift = 0;
307 const guchar *p = start;
308
309 while (remain > 0 && shift <= 57) {
310 if (*p & 0x80) {
311 t |= ((guint64)(*p & 0x7f)) << shift;
312 }
313 else {
314 t |= ((guint64)(*p & 0x7f)) << shift;
315 p ++;
316 break;
317 }
318
319 shift += 7;
320 p++;
321 remain --;
322 }
323
324 if (remain == 0 || shift > 64) {
325 return -1;
326 }
327
328 *res = GUINT64_FROM_LE (t);
329
330 return p - start;
331 }
332
333 #define RAR_SKIP_BYTES(n) do { \
334 if ((n) <= 0) { \
335 msg_debug_archive ("rar archive is invalid (bad skip value)"); \
336 return; \
337 } \
338 if ((gsize)(end - p) < (n)) { \
339 msg_debug_archive ("rar archive is invalid (truncated)"); \
340 return; \
341 } \
342 p += (n); \
343 } while (0)
344
345 #define RAR_READ_VINT() do { \
346 r = rspamd_archive_rar_read_vint (p, end - p, &vint); \
347 if (r == -1) { \
348 msg_debug_archive ("rar archive is invalid (bad vint)"); \
349 return; \
350 } \
351 else if (r == 0) { \
352 msg_debug_archive ("rar archive is invalid (BAD vint offset)"); \
353 return; \
354 }\
355 } while (0)
356
357 #define RAR_READ_VINT_SKIP() do { \
358 r = rspamd_archive_rar_read_vint (p, end - p, &vint); \
359 if (r == -1) { \
360 msg_debug_archive ("rar archive is invalid (bad vint)"); \
361 return; \
362 } \
363 p += r; \
364 } while (0)
365
366 #define RAR_READ_UINT16(n) do { \
367 if (end - p < (glong)sizeof (guint16)) { \
368 msg_debug_archive ("rar archive is invalid (bad int16)"); \
369 return; \
370 } \
371 n = p[0] + (p[1] << 8); \
372 p += sizeof (guint16); \
373 } while (0)
374
375 #define RAR_READ_UINT32(n) do { \
376 if (end - p < (glong)sizeof (guint32)) { \
377 msg_debug_archive ("rar archive is invalid (bad int32)"); \
378 return; \
379 } \
380 n = (guint)p[0] + ((guint)p[1] << 8) + ((guint)p[2] << 16) + ((guint)p[3] << 24); \
381 p += sizeof (guint32); \
382 } while (0)
383
384 static void
rspamd_archive_process_rar_v4(struct rspamd_task * task,const guchar * start,const guchar * end,struct rspamd_mime_part * part)385 rspamd_archive_process_rar_v4 (struct rspamd_task *task, const guchar *start,
386 const guchar *end, struct rspamd_mime_part *part)
387 {
388 const guchar *p = start, *start_section;
389 guint8 type;
390 guint flags;
391 guint64 sz, comp_sz = 0, uncomp_sz = 0;
392 struct rspamd_archive *arch;
393 struct rspamd_archive_file *f;
394
395 arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
396 arch->files = g_ptr_array_new ();
397 arch->type = RSPAMD_ARCHIVE_RAR;
398 rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
399 arch);
400
401 while (p < end) {
402 /* Crc16 */
403 start_section = p;
404 RAR_SKIP_BYTES (sizeof (guint16));
405 type = *p;
406 p ++;
407 RAR_READ_UINT16 (flags);
408
409 if (type == 0x73) {
410 /* Main header, check for encryption */
411 if (flags & 0x80) {
412 arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
413 goto end;
414 }
415 }
416
417 RAR_READ_UINT16 (sz);
418
419 if (flags & 0x8000) {
420 /* We also need to read ADD_SIZE element */
421 guint32 tmp;
422
423 RAR_READ_UINT32 (tmp);
424 sz += tmp;
425 /* This is also used as PACK_SIZE */
426 comp_sz = tmp;
427 }
428
429 if (sz == 0) {
430 /* Zero sized block - error */
431 msg_debug_archive ("rar archive is invalid (zero size block)");
432
433 return;
434 }
435
436 if (type == 0x74) {
437 guint fname_len;
438
439 /* File header */
440 /* Uncompressed size */
441 RAR_READ_UINT32 (uncomp_sz);
442 /* Skip to NAME_SIZE element */
443 RAR_SKIP_BYTES (11);
444 RAR_READ_UINT16 (fname_len);
445
446 if (fname_len == 0 || fname_len > (gsize)(end - p)) {
447 msg_debug_archive ("rar archive is invalid (bad filename size: %d)",
448 fname_len);
449
450 return;
451 }
452
453 /* Attrs */
454 RAR_SKIP_BYTES (4);
455
456 if (flags & 0x100) {
457 /* We also need to read HIGH_PACK_SIZE */
458 guint32 tmp;
459
460 RAR_READ_UINT32 (tmp);
461 sz += tmp;
462 comp_sz += tmp;
463 /* HIGH_UNP_SIZE */
464 RAR_READ_UINT32 (tmp);
465 uncomp_sz += tmp;
466 }
467
468 f = g_malloc0 (sizeof (*f));
469
470 if (flags & 0x200) {
471 /* We have unicode + normal version */
472 guchar *tmp;
473
474 tmp = memchr (p, '\0', fname_len);
475
476 if (tmp != NULL) {
477 /* Just use ASCII version */
478 f->fname = rspamd_archive_file_try_utf (task, p, tmp - p);
479 msg_debug_archive ("found ascii filename in rarv4 archive: %v",
480 f->fname);
481 }
482 else {
483 /* We have UTF8 filename, use it as is */
484 f->fname = rspamd_archive_file_try_utf (task, p, fname_len);
485 msg_debug_archive ("found utf filename in rarv4 archive: %v",
486 f->fname);
487 }
488 }
489 else {
490 f->fname = rspamd_archive_file_try_utf (task, p, fname_len);
491 msg_debug_archive ("found ascii (old) filename in rarv4 archive: %v",
492 f->fname);
493 }
494
495 f->compressed_size = comp_sz;
496 f->uncompressed_size = uncomp_sz;
497
498 if (flags & 0x4) {
499 f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
500 }
501
502 if (f->fname) {
503 g_ptr_array_add (arch->files, f);
504 }
505 else {
506 g_free (f);
507 }
508 }
509
510 p = start_section;
511 RAR_SKIP_BYTES (sz);
512 }
513
514 end:
515 part->part_type = RSPAMD_MIME_PART_ARCHIVE;
516 part->specific.arch = arch;
517 arch->archive_name = &part->cd->filename;
518 arch->size = part->parsed_data.len;
519 }
520
521 static void
rspamd_archive_process_rar(struct rspamd_task * task,struct rspamd_mime_part * part)522 rspamd_archive_process_rar (struct rspamd_task *task,
523 struct rspamd_mime_part *part)
524 {
525 const guchar *p, *end, *section_start;
526 const guchar rar_v5_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x01, 0x00},
527 rar_v4_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00};
528 const guint rar_encrypted_header = 4, rar_main_header = 1,
529 rar_file_header = 2;
530 guint64 vint, sz, comp_sz = 0, uncomp_sz = 0, flags = 0, type = 0,
531 extra_sz = 0;
532 struct rspamd_archive *arch;
533 struct rspamd_archive_file *f;
534 gint r;
535
536 p = part->parsed_data.begin;
537 end = p + part->parsed_data.len;
538
539 if ((gsize)(end - p) <= sizeof (rar_v5_magic)) {
540 msg_debug_archive ("rar archive is invalid (too small)");
541
542 return;
543 }
544
545 if (memcmp (p, rar_v5_magic, sizeof (rar_v5_magic)) == 0) {
546 p += sizeof (rar_v5_magic);
547 }
548 else if (memcmp (p, rar_v4_magic, sizeof (rar_v4_magic)) == 0) {
549 p += sizeof (rar_v4_magic);
550
551 rspamd_archive_process_rar_v4 (task, p, end, part);
552 return;
553 }
554 else {
555 msg_debug_archive ("rar archive is invalid (no rar magic)");
556
557 return;
558 }
559
560 /* Rar v5 format */
561 arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
562 arch->files = g_ptr_array_new ();
563 arch->type = RSPAMD_ARCHIVE_RAR;
564 rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
565 arch);
566
567 /* Now we can have either encryption header or archive header */
568 /* Crc 32 */
569 RAR_SKIP_BYTES (sizeof (guint32));
570 /* Size */
571 RAR_READ_VINT_SKIP ();
572 sz = vint;
573 /* Type */
574 section_start = p;
575 RAR_READ_VINT_SKIP ();
576 type = vint;
577 /* Header flags */
578 RAR_READ_VINT_SKIP ();
579 flags = vint;
580
581 if (flags & 0x1) {
582 /* Have extra zone */
583 RAR_READ_VINT_SKIP ();
584 }
585 if (flags & 0x2) {
586 /* Data zone is presented */
587 RAR_READ_VINT_SKIP ();
588 sz += vint;
589 }
590
591 if (type == rar_encrypted_header) {
592 /* We can't read any further information as archive is encrypted */
593 arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
594 goto end;
595 }
596 else if (type != rar_main_header) {
597 msg_debug_archive ("rar archive is invalid (bad main header)");
598
599 return;
600 }
601
602 /* Nothing useful in main header */
603 p = section_start;
604 RAR_SKIP_BYTES (sz);
605
606 while (p < end) {
607 gboolean has_extra = FALSE;
608 /* Read the next header */
609 /* Crc 32 */
610 RAR_SKIP_BYTES (sizeof (guint32));
611 /* Size */
612 RAR_READ_VINT_SKIP ();
613
614 sz = vint;
615 if (sz == 0) {
616 /* Zero sized block - error */
617 msg_debug_archive ("rar archive is invalid (zero size block)");
618
619 return;
620 }
621
622 section_start = p;
623 /* Type */
624 RAR_READ_VINT_SKIP ();
625 type = vint;
626 /* Header flags */
627 RAR_READ_VINT_SKIP ();
628 flags = vint;
629
630 if (flags & 0x1) {
631 /* Have extra zone */
632 RAR_READ_VINT_SKIP ();
633 extra_sz = vint;
634 has_extra = TRUE;
635 }
636
637 if (flags & 0x2) {
638 /* Data zone is presented */
639 RAR_READ_VINT_SKIP ();
640 sz += vint;
641 comp_sz = vint;
642 }
643
644 if (type != rar_file_header) {
645 p = section_start;
646 RAR_SKIP_BYTES (sz);
647 }
648 else {
649 /* We have a file header, go forward */
650 guint64 fname_len;
651
652 /* File header specific flags */
653 RAR_READ_VINT_SKIP ();
654 flags = vint;
655
656 /* Unpacked size */
657 RAR_READ_VINT_SKIP ();
658 uncomp_sz = vint;
659 /* Attributes */
660 RAR_READ_VINT_SKIP ();
661
662 if (flags & 0x2) {
663 /* Unix mtime */
664 RAR_SKIP_BYTES (sizeof (guint32));
665 }
666 if (flags & 0x4) {
667 /* Crc32 */
668 RAR_SKIP_BYTES (sizeof (guint32));
669 }
670
671 /* Compression */
672 RAR_READ_VINT_SKIP ();
673 /* Host OS */
674 RAR_READ_VINT_SKIP ();
675 /* Filename length (finally!) */
676 RAR_READ_VINT_SKIP ();
677 fname_len = vint;
678
679 if (fname_len == 0 || fname_len > (gsize)(end - p)) {
680 msg_debug_archive ("rar archive is invalid (bad filename size)");
681
682 return;
683 }
684
685 f = g_malloc0 (sizeof (*f));
686 f->uncompressed_size = uncomp_sz;
687 f->compressed_size = comp_sz;
688 f->fname = rspamd_archive_file_try_utf (task, p, fname_len);
689
690 if (f->fname) {
691 msg_debug_archive ("added rarv5 file: %v", f->fname);
692 g_ptr_array_add (arch->files, f);
693 }
694 else {
695 g_free (f);
696 f = NULL;
697 }
698
699 if (f && has_extra && extra_sz > 0 &&
700 p + fname_len + extra_sz < end) {
701 /* Try to find encryption record in extra field */
702 const guchar *ex = p + fname_len;
703
704 while (ex < p + extra_sz) {
705 const guchar *t;
706 gint64 cur_sz = 0, sec_type = 0;
707
708 r = rspamd_archive_rar_read_vint (ex, extra_sz, &cur_sz);
709 if (r == -1) {
710 msg_debug_archive ("rar archive is invalid (bad vint)");
711 return;
712 }
713
714 t = ex + r;
715
716 r = rspamd_archive_rar_read_vint (t, extra_sz - r, &sec_type);
717 if (r == -1) {
718 msg_debug_archive ("rar archive is invalid (bad vint)");
719 return;
720 }
721
722 if (sec_type == 0x01) {
723 f->flags |= RSPAMD_ARCHIVE_FILE_ENCRYPTED;
724 arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
725 break;
726 }
727
728 ex += cur_sz;
729 }
730 }
731
732 /* Restore p to the beginning of the header */
733 p = section_start;
734 RAR_SKIP_BYTES (sz);
735 }
736 }
737
738 end:
739 part->part_type = RSPAMD_MIME_PART_ARCHIVE;
740 part->specific.arch = arch;
741 if (part->cd != NULL) {
742 arch->archive_name = &part->cd->filename;
743 }
744 arch->size = part->parsed_data.len;
745 }
746
747 static inline gint
rspamd_archive_7zip_read_vint(const guchar * start,gsize remain,guint64 * res)748 rspamd_archive_7zip_read_vint (const guchar *start, gsize remain, guint64 *res)
749 {
750 /*
751 * REAL_UINT64 means real UINT64.
752 * UINT64 means real UINT64 encoded with the following scheme:
753 *
754 * Size of encoding sequence depends from first byte:
755 * First_Byte Extra_Bytes Value
756 * (binary)
757 * 0xxxxxxx : ( xxxxxxx )
758 * 10xxxxxx BYTE y[1] : ( xxxxxx << (8 * 1)) + y
759 * 110xxxxx BYTE y[2] : ( xxxxx << (8 * 2)) + y
760 * ...
761 * 1111110x BYTE y[6] : ( x << (8 * 6)) + y
762 * 11111110 BYTE y[7] : y
763 * 11111111 BYTE y[8] : y
764 */
765 guchar t;
766
767 if (remain == 0) {
768 return -1;
769 }
770
771 t = *start;
772
773 if (!isset (&t, 7)) {
774 /* Trivial case */
775 *res = t;
776 return 1;
777 }
778 else if (t == 0xFF) {
779 if (remain >= sizeof (guint64) + 1) {
780 memcpy (res, start + 1, sizeof (guint64));
781 *res = GUINT64_FROM_LE (*res);
782
783 return sizeof (guint64) + 1;
784 }
785 }
786 else {
787 gint cur_bit = 6, intlen = 1;
788 const guchar bmask = 0xFF;
789 guint64 tgt;
790
791 while (cur_bit > 0) {
792 if (!isset (&t, cur_bit)) {
793 if (remain >= intlen + 1) {
794 memcpy (&tgt, start + 1, intlen);
795 tgt = GUINT64_FROM_LE (tgt);
796 /* Shift back */
797 tgt >>= sizeof (tgt) - NBBY * intlen;
798 /* Add masked value */
799 tgt += (guint64)(t & (bmask >> (NBBY - cur_bit)))
800 << (NBBY * intlen);
801 *res = tgt;
802
803 return intlen + 1;
804 }
805 }
806 cur_bit --;
807 intlen ++;
808 }
809 }
810
811 return -1;
812 }
813
814 #define SZ_READ_VINT_SKIP() do { \
815 r = rspamd_archive_7zip_read_vint (p, end - p, &vint); \
816 if (r == -1) { \
817 msg_debug_archive ("7z archive is invalid (bad vint)"); \
818 return; \
819 } \
820 p += r; \
821 } while (0)
822 #define SZ_READ_VINT(var) do { \
823 int r; \
824 r = rspamd_archive_7zip_read_vint (p, end - p, &(var)); \
825 if (r == -1) { \
826 msg_debug_archive ("7z archive is invalid (bad vint): %s", G_STRLOC); \
827 return NULL; \
828 } \
829 p += r; \
830 } while (0)
831
832 #define SZ_READ_UINT64(n) do { \
833 if (end - p < (goffset)sizeof (guint64)) { \
834 msg_debug_archive ("7zip archive is invalid (bad uint64): %s", G_STRLOC); \
835 return; \
836 } \
837 memcpy (&(n), p, sizeof (guint64)); \
838 n = GUINT64_FROM_LE(n); \
839 p += sizeof (guint64); \
840 } while (0)
841 #define SZ_SKIP_BYTES(n) do { \
842 if (end - p >= (n)) { \
843 p += (n); \
844 } \
845 else { \
846 msg_debug_archive ("7zip archive is invalid (truncated); wanted to read %d bytes, %d avail: %s", (gint)(n), (gint)(end - p), G_STRLOC); \
847 return NULL; \
848 } \
849 } while (0)
850
851 enum rspamd_7zip_header_mark {
852 kEnd = 0x00,
853 kHeader = 0x01,
854 kArchiveProperties = 0x02,
855 kAdditionalStreamsInfo = 0x03,
856 kMainStreamsInfo = 0x04,
857 kFilesInfo = 0x05,
858 kPackInfo = 0x06,
859 kUnPackInfo = 0x07,
860 kSubStreamsInfo = 0x08,
861 kSize = 0x09,
862 kCRC = 0x0A,
863 kFolder = 0x0B,
864 kCodersUnPackSize = 0x0C,
865 kNumUnPackStream = 0x0D,
866 kEmptyStream = 0x0E,
867 kEmptyFile = 0x0F,
868 kAnti = 0x10,
869 kName = 0x11,
870 kCTime = 0x12,
871 kATime = 0x13,
872 kMTime = 0x14,
873 kWinAttributes = 0x15,
874 kComment = 0x16,
875 kEncodedHeader = 0x17,
876 kStartPos = 0x18,
877 kDummy = 0x19,
878 };
879
880
881 #define _7Z_CRYPTO_MAIN_ZIP 0x06F10101 /* Main Zip crypto algo */
882 #define _7Z_CRYPTO_RAR_29 0x06F10303 /* Rar29 AES-128 + (modified SHA-1) */
883 #define _7Z_CRYPTO_AES_256_SHA_256 0x06F10701 /* AES-256 + SHA-256 */
884
885 #define IS_SZ_ENCRYPTED(codec_id) (((codec_id) == _7Z_CRYPTO_MAIN_ZIP) || \
886 ((codec_id) == _7Z_CRYPTO_RAR_29) || \
887 ((codec_id) == _7Z_CRYPTO_AES_256_SHA_256))
888
889 static const guchar *
rspamd_7zip_read_bits(struct rspamd_task * task,const guchar * p,const guchar * end,struct rspamd_archive * arch,guint nbits,guint * pbits_set)890 rspamd_7zip_read_bits (struct rspamd_task *task,
891 const guchar *p, const guchar *end,
892 struct rspamd_archive *arch, guint nbits,
893 guint *pbits_set)
894 {
895 unsigned mask = 0, avail = 0, i;
896 gboolean bit_set = 0;
897
898 for (i = 0; i < nbits; i++) {
899 if (mask == 0) {
900 avail = *p;
901 SZ_SKIP_BYTES(1);
902 mask = 0x80;
903 }
904
905 bit_set = (avail & mask) ? 1 : 0;
906
907 if (bit_set && pbits_set) {
908 (*pbits_set) ++;
909 }
910
911 mask >>= 1;
912 }
913
914 return p;
915 }
916
917 static const guchar *
rspamd_7zip_read_digest(struct rspamd_task * task,const guchar * p,const guchar * end,struct rspamd_archive * arch,guint64 num_streams,guint * pdigest_read)918 rspamd_7zip_read_digest (struct rspamd_task *task,
919 const guchar *p, const guchar *end,
920 struct rspamd_archive *arch,
921 guint64 num_streams,
922 guint *pdigest_read)
923 {
924 guchar all_defined = *p;
925 guint64 i;
926 guint num_defined = 0;
927 /*
928 * BYTE AllAreDefined
929 * if (AllAreDefined == 0)
930 * {
931 * for(NumStreams)
932 * BIT Defined
933 * }
934 * UINT32 CRCs[NumDefined]
935 */
936 SZ_SKIP_BYTES(1);
937
938 if (all_defined) {
939 num_defined = num_streams;
940 }
941 else {
942 if (num_streams > 8192) {
943 /* Gah */
944 return NULL;
945 }
946
947 p = rspamd_7zip_read_bits (task, p, end, arch, num_streams, &num_defined);
948
949 if (p == NULL) {
950 return NULL;
951 }
952 }
953
954 for (i = 0; i < num_defined; i ++) {
955 SZ_SKIP_BYTES(sizeof(guint32));
956 }
957
958 if (pdigest_read) {
959 *pdigest_read = num_defined;
960 }
961
962 return p;
963 }
964
965 static const guchar *
rspamd_7zip_read_pack_info(struct rspamd_task * task,const guchar * p,const guchar * end,struct rspamd_archive * arch)966 rspamd_7zip_read_pack_info (struct rspamd_task *task,
967 const guchar *p, const guchar *end,
968 struct rspamd_archive *arch)
969 {
970 guint64 pack_pos = 0, pack_streams = 0, i, cur_sz;
971 guint num_digests = 0;
972 guchar t;
973 /*
974 * UINT64 PackPos
975 * UINT64 NumPackStreams
976 *
977 * []
978 * BYTE NID::kSize (0x09)
979 * UINT64 PackSizes[NumPackStreams]
980 * []
981 *
982 * []
983 * BYTE NID::kCRC (0x0A)
984 * PackStreamDigests[NumPackStreams]
985 * []
986 * BYTE NID::kEnd
987 */
988
989 SZ_READ_VINT(pack_pos);
990 SZ_READ_VINT(pack_streams);
991
992 while (p != NULL && p < end) {
993 t = *p;
994 SZ_SKIP_BYTES(1);
995 msg_debug_archive ("7zip: read pack info %xc", t);
996
997 switch (t) {
998 case kSize:
999 /* We need to skip pack_streams VINTS */
1000 for (i = 0; i < pack_streams; i++) {
1001 SZ_READ_VINT(cur_sz);
1002 }
1003 break;
1004 case kCRC:
1005 /* CRCs are more complicated */
1006 p = rspamd_7zip_read_digest (task, p, end, arch, pack_streams,
1007 &num_digests);
1008 break;
1009 case kEnd:
1010 goto end;
1011 break;
1012 default:
1013 p = NULL;
1014 msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
1015 goto end;
1016 break;
1017 }
1018 }
1019
1020 end:
1021
1022 return p;
1023 }
1024
1025 static const guchar *
rspamd_7zip_read_folder(struct rspamd_task * task,const guchar * p,const guchar * end,struct rspamd_archive * arch,guint * pnstreams,guint * ndigests)1026 rspamd_7zip_read_folder (struct rspamd_task *task,
1027 const guchar *p, const guchar *end,
1028 struct rspamd_archive *arch, guint *pnstreams, guint *ndigests)
1029 {
1030 guint64 ncoders = 0, i, j, noutstreams = 0, ninstreams = 0;
1031
1032 SZ_READ_VINT (ncoders);
1033
1034 for (i = 0; i < ncoders && p != NULL && p < end; i ++) {
1035 guint64 sz, tmp;
1036 guchar t;
1037 /*
1038 * BYTE
1039 * {
1040 * 0:3 CodecIdSize
1041 * 4: Is Complex Coder
1042 * 5: There Are Attributes
1043 * 6: Reserved
1044 * 7: There are more alternative methods. (Not used anymore, must be 0).
1045 * }
1046 * BYTE CodecId[CodecIdSize]
1047 * if (Is Complex Coder)
1048 * {
1049 * UINT64 NumInStreams;
1050 * UINT64 NumOutStreams;
1051 * }
1052 * if (There Are Attributes)
1053 * {
1054 * UINT64 PropertiesSize
1055 * BYTE Properties[PropertiesSize]
1056 * }
1057 */
1058 t = *p;
1059 SZ_SKIP_BYTES (1);
1060 sz = t & 0xF;
1061 /* Codec ID */
1062 tmp = 0;
1063 for (j = 0; j < sz; j++) {
1064 tmp <<= 8;
1065 tmp += p[j];
1066 }
1067
1068 msg_debug_archive ("7zip: read codec id: %L", tmp);
1069
1070 if (IS_SZ_ENCRYPTED (tmp)) {
1071 arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
1072 }
1073
1074 SZ_SKIP_BYTES (sz);
1075
1076 if (t & (1u << 4)) {
1077 /* Complex */
1078 SZ_READ_VINT (tmp); /* InStreams */
1079 ninstreams += tmp;
1080 SZ_READ_VINT (tmp); /* OutStreams */
1081 noutstreams += tmp;
1082 }
1083 else {
1084 /* XXX: is it correct ? */
1085 noutstreams ++;
1086 ninstreams ++;
1087 }
1088 if (t & (1u << 5)) {
1089 /* Attributes ... */
1090 SZ_READ_VINT (tmp); /* Size of attrs */
1091 SZ_SKIP_BYTES (tmp);
1092 }
1093 }
1094
1095 if (noutstreams > 1) {
1096 /* BindPairs, WTF, huh */
1097 for (i = 0; i < noutstreams - 1; i ++) {
1098 guint64 tmp;
1099
1100 SZ_READ_VINT (tmp);
1101 SZ_READ_VINT (tmp);
1102 }
1103 }
1104
1105 gint64 npacked = (gint64)ninstreams - (gint64)noutstreams + 1;
1106 msg_debug_archive ("7zip: instreams=%L, outstreams=%L, packed=%L",
1107 ninstreams, noutstreams, npacked);
1108
1109 if (npacked > 1) {
1110 /* Gah... */
1111 for (i = 0; i < npacked; i ++) {
1112 guint64 tmp;
1113
1114 SZ_READ_VINT (tmp);
1115 }
1116 }
1117
1118 *pnstreams = noutstreams;
1119 (*ndigests) += npacked;
1120
1121 return p;
1122 }
1123
1124 static const guchar *
rspamd_7zip_read_coders_info(struct rspamd_task * task,const guchar * p,const guchar * end,struct rspamd_archive * arch,guint * pnum_folders,guint * pnum_nodigest)1125 rspamd_7zip_read_coders_info (struct rspamd_task *task,
1126 const guchar *p, const guchar *end,
1127 struct rspamd_archive *arch,
1128 guint *pnum_folders, guint *pnum_nodigest)
1129 {
1130 guint64 num_folders = 0, i, tmp;
1131 guchar t;
1132 guint *folder_nstreams = NULL, num_digests = 0, digests_read = 0;
1133
1134 while (p != NULL && p < end) {
1135 /*
1136 * BYTE NID::kFolder (0x0B)
1137 * UINT64 NumFolders
1138 * BYTE External
1139 * switch(External)
1140 * {
1141 * case 0:
1142 * Folders[NumFolders]
1143 * case 1:
1144 * UINT64 DataStreamIndex
1145 * }
1146 * BYTE ID::kCodersUnPackSize (0x0C)
1147 * for(Folders)
1148 * for(Folder.NumOutStreams)
1149 * UINT64 UnPackSize;
1150 * []
1151 * BYTE NID::kCRC (0x0A)
1152 * UnPackDigests[NumFolders]
1153 * []
1154 * BYTE NID::kEnd
1155 */
1156
1157 t = *p;
1158 SZ_SKIP_BYTES(1);
1159 msg_debug_archive ("7zip: read coders info %xc", t);
1160
1161 switch (t) {
1162 case kFolder:
1163 SZ_READ_VINT (num_folders);
1164 msg_debug_archive ("7zip: nfolders=%L", num_folders);
1165
1166 if (*p != 0) {
1167 /* External folders */
1168 SZ_SKIP_BYTES(1);
1169 SZ_READ_VINT (tmp);
1170 }
1171 else {
1172 SZ_SKIP_BYTES(1);
1173
1174 if (num_folders > 8192) {
1175 /* Gah */
1176 return NULL;
1177 }
1178
1179 if (folder_nstreams) {
1180 g_free (folder_nstreams);
1181 }
1182
1183 folder_nstreams = g_malloc (sizeof (int) * num_folders);
1184
1185 for (i = 0; i < num_folders && p != NULL && p < end; i++) {
1186 p = rspamd_7zip_read_folder (task, p, end, arch,
1187 &folder_nstreams[i], &num_digests);
1188 }
1189 }
1190 break;
1191 case kCodersUnPackSize:
1192 for (i = 0; i < num_folders && p != NULL && p < end; i++) {
1193 if (folder_nstreams) {
1194 for (guint j = 0; j < folder_nstreams[i]; j++) {
1195 SZ_READ_VINT (tmp); /* Unpacked size */
1196 msg_debug_archive ("7zip: unpacked size "
1197 "(folder=%d, stream=%d) = %L",
1198 (gint)i, j, tmp);
1199 }
1200 }
1201 else {
1202 msg_err_task ("internal 7zip error");
1203 }
1204 }
1205 break;
1206 case kCRC:
1207 /*
1208 * Here are dragons. Spec tells that here there could be up
1209 * to nfolders digests. However, according to the actual source
1210 * code, in case of multiple out streams there should be digests
1211 * for all out streams.
1212 *
1213 * In the real life (tm) it is even more idiotic: all these digests
1214 * are in another section! But that section needs number of digests
1215 * that are absent here. It is the most stupid thing I've ever seen
1216 * in any file format.
1217 *
1218 * I hope there *WAS* some reason to do such shit...
1219 */
1220 p = rspamd_7zip_read_digest (task, p, end, arch, num_digests,
1221 &digests_read);
1222 break;
1223 case kEnd:
1224 goto end;
1225 break;
1226 default:
1227 p = NULL;
1228 msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
1229 goto end;
1230 break;
1231 }
1232 }
1233
1234 end:
1235
1236 if (pnum_nodigest) {
1237 *pnum_nodigest = num_digests - digests_read;
1238 }
1239 if (pnum_folders) {
1240 *pnum_folders = num_folders;
1241 }
1242
1243 if (folder_nstreams) {
1244 g_free (folder_nstreams);
1245 }
1246
1247 return p;
1248 }
1249
1250 static const guchar *
rspamd_7zip_read_substreams_info(struct rspamd_task * task,const guchar * p,const guchar * end,struct rspamd_archive * arch,guint num_folders,guint num_nodigest)1251 rspamd_7zip_read_substreams_info (struct rspamd_task *task,
1252 const guchar *p, const guchar *end,
1253 struct rspamd_archive *arch,
1254 guint num_folders, guint num_nodigest)
1255 {
1256 guchar t;
1257 guint i;
1258 guint64 *folder_nstreams;
1259
1260 if (num_folders > 8192) {
1261 /* Gah */
1262 return NULL;
1263 }
1264
1265 folder_nstreams = g_alloca (sizeof (guint64) * num_folders);
1266 memset (folder_nstreams, 0, sizeof (guint64) * num_folders);
1267
1268 while (p != NULL && p < end) {
1269 /*
1270 * []
1271 * BYTE NID::kNumUnPackStream; (0x0D)
1272 * UINT64 NumUnPackStreamsInFolders[NumFolders];
1273 * []
1274 *
1275 * []
1276 * BYTE NID::kSize (0x09)
1277 * UINT64 UnPackSizes[??]
1278 * []
1279 *
1280 *
1281 * []
1282 * BYTE NID::kCRC (0x0A)
1283 * Digests[Number of streams with unknown CRC]
1284 * []
1285
1286 */
1287 t = *p;
1288 SZ_SKIP_BYTES(1);
1289
1290 msg_debug_archive ("7zip: read substream info %xc", t);
1291
1292 switch (t) {
1293 case kNumUnPackStream:
1294 for (i = 0; i < num_folders; i ++) {
1295 guint64 tmp;
1296
1297 SZ_READ_VINT (tmp);
1298 folder_nstreams[i] = tmp;
1299 }
1300 break;
1301 case kCRC:
1302 /*
1303 * Read the comment in the rspamd_7zip_read_coders_info
1304 */
1305 p = rspamd_7zip_read_digest (task, p, end, arch, num_nodigest,
1306 NULL);
1307 break;
1308 case kSize:
1309 /*
1310 * Another brain damaged logic, but we have to support it
1311 * as there are no ways to proceed without it.
1312 * In fact, it is just absent in the real life...
1313 */
1314 for (i = 0; i < num_folders; i ++) {
1315 for (guint j = 0; j < folder_nstreams[i]; j++) {
1316 guint64 tmp;
1317
1318 SZ_READ_VINT (tmp); /* Who cares indeed */
1319 }
1320 }
1321 break;
1322 case kEnd:
1323 goto end;
1324 break;
1325 default:
1326 p = NULL;
1327 msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
1328 goto end;
1329 break;
1330 }
1331 }
1332
1333 end:
1334 return p;
1335 }
1336
1337 static const guchar *
rspamd_7zip_read_main_streams_info(struct rspamd_task * task,const guchar * p,const guchar * end,struct rspamd_archive * arch)1338 rspamd_7zip_read_main_streams_info (struct rspamd_task *task,
1339 const guchar *p, const guchar *end,
1340 struct rspamd_archive *arch)
1341 {
1342 guchar t;
1343 guint num_folders = 0, unknown_digests = 0;
1344
1345 while (p != NULL && p < end) {
1346 t = *p;
1347 SZ_SKIP_BYTES(1);
1348 msg_debug_archive ("7zip: read main streams info %xc", t);
1349
1350 /*
1351 *
1352 * []
1353 * PackInfo
1354 * []
1355
1356 * []
1357 * CodersInfo
1358 * []
1359 *
1360 * []
1361 * SubStreamsInfo
1362 * []
1363 *
1364 * BYTE NID::kEnd
1365 */
1366 switch (t) {
1367 case kPackInfo:
1368 p = rspamd_7zip_read_pack_info (task, p, end, arch);
1369 break;
1370 case kUnPackInfo:
1371 p = rspamd_7zip_read_coders_info (task, p, end, arch, &num_folders,
1372 &unknown_digests);
1373 break;
1374 case kSubStreamsInfo:
1375 p = rspamd_7zip_read_substreams_info (task, p, end, arch, num_folders,
1376 unknown_digests);
1377 break;
1378 break;
1379 case kEnd:
1380 goto end;
1381 break;
1382 default:
1383 p = NULL;
1384 msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
1385 goto end;
1386 break;
1387 }
1388 }
1389
1390 end:
1391 return p;
1392 }
1393
1394 static const guchar *
rspamd_7zip_read_archive_props(struct rspamd_task * task,const guchar * p,const guchar * end,struct rspamd_archive * arch)1395 rspamd_7zip_read_archive_props (struct rspamd_task *task,
1396 const guchar *p, const guchar *end,
1397 struct rspamd_archive *arch)
1398 {
1399 guchar proptype;
1400 guint64 proplen;
1401
1402 /*
1403 * for (;;)
1404 * {
1405 * BYTE PropertyType;
1406 * if (aType == 0)
1407 * break;
1408 * UINT64 PropertySize;
1409 * BYTE PropertyData[PropertySize];
1410 * }
1411 */
1412
1413 if (p != NULL) {
1414 proptype = *p;
1415 SZ_SKIP_BYTES(1);
1416
1417 while (proptype != 0) {
1418 SZ_READ_VINT(proplen);
1419
1420 if (p + proplen < end) {
1421 p += proplen;
1422 }
1423 else {
1424 return NULL;
1425 }
1426
1427 proptype = *p;
1428 SZ_SKIP_BYTES(1);
1429 }
1430 }
1431
1432 return p;
1433 }
1434
1435 static GString *
rspamd_7zip_ucs2_to_utf8(struct rspamd_task * task,const guchar * p,const guchar * end)1436 rspamd_7zip_ucs2_to_utf8 (struct rspamd_task *task, const guchar *p,
1437 const guchar *end)
1438 {
1439 GString *res;
1440 goffset dest_pos = 0, src_pos = 0;
1441 const gsize len = (end - p) / sizeof (guint16);
1442 guint16 *up;
1443 UChar32 wc;
1444 UBool is_error = 0;
1445
1446 res = g_string_sized_new ((end - p) * 3 / 2 + sizeof (wc) + 1);
1447 up = (guint16 *)p;
1448
1449 while (src_pos < len) {
1450 U16_NEXT (up, src_pos, len, wc);
1451
1452 if (wc > 0) {
1453 U8_APPEND (res->str, dest_pos,
1454 res->allocated_len - 1,
1455 wc, is_error);
1456 }
1457
1458 if (is_error) {
1459 g_string_free (res, TRUE);
1460
1461 return NULL;
1462 }
1463 }
1464
1465 g_assert (dest_pos < res->allocated_len);
1466
1467 res->len = dest_pos;
1468 res->str[dest_pos] = '\0';
1469
1470 return res;
1471 }
1472
1473 static const guchar *
rspamd_7zip_read_files_info(struct rspamd_task * task,const guchar * p,const guchar * end,struct rspamd_archive * arch)1474 rspamd_7zip_read_files_info (struct rspamd_task *task,
1475 const guchar *p, const guchar *end,
1476 struct rspamd_archive *arch)
1477 {
1478 guint64 nfiles = 0, sz, i;
1479 guchar t, b;
1480 struct rspamd_archive_file *fentry;
1481
1482 SZ_READ_VINT (nfiles);
1483
1484 for (;p != NULL && p < end;) {
1485 t = *p;
1486 SZ_SKIP_BYTES (1);
1487
1488 msg_debug_archive ("7zip: read file data type %xc", t);
1489
1490 if (t == kEnd) {
1491 goto end;
1492 }
1493
1494 /* This is SO SPECIAL, gah */
1495 SZ_READ_VINT (sz);
1496
1497 switch (t) {
1498 case kEmptyStream:
1499 case kEmptyFile:
1500 case kAnti: /* AntiFile, OMFG */
1501 /* We don't care about these bits */
1502 case kCTime:
1503 case kATime:
1504 case kMTime:
1505 /* We don't care of these guys, but we still have to parse them, gah */
1506 if (sz > 0) {
1507 SZ_SKIP_BYTES (sz);
1508 }
1509 break;
1510 case kName:
1511 /* The most useful part in this whole bloody format */
1512 b = *p; /* External flag */
1513 SZ_SKIP_BYTES (1);
1514
1515 if (b) {
1516 /* TODO: for the god sake, do something about external
1517 * filenames...
1518 */
1519 guint64 tmp;
1520
1521 SZ_READ_VINT (tmp);
1522 }
1523 else {
1524 for (i = 0; i < nfiles; i ++) {
1525 /* Zero terminated wchar_t: happy converting... */
1526 /* First, find terminator */
1527 const guchar *fend = NULL, *tp = p;
1528 GString *res;
1529
1530 while (tp < end - 1) {
1531 if (*tp == 0 && *(tp + 1) == 0) {
1532 fend = tp;
1533 break;
1534 }
1535
1536 tp += 2;
1537 }
1538
1539 if (fend == NULL || fend - p == 0) {
1540 /* Crap instead of fname */
1541 msg_debug_archive ("bad 7zip name; %s", G_STRLOC);
1542 goto end;
1543 }
1544
1545 res = rspamd_7zip_ucs2_to_utf8 (task, p, fend);
1546
1547 if (res != NULL) {
1548 fentry = g_malloc0 (sizeof (*fentry));
1549 fentry->fname = res;
1550 g_ptr_array_add (arch->files, fentry);
1551 msg_debug_archive ("7zip: found file %v", res);
1552 }
1553 else {
1554 msg_debug_archive ("bad 7zip name; %s", G_STRLOC);
1555 }
1556 /* Skip zero terminating character */
1557 p = fend + 2;
1558 }
1559 }
1560 break;
1561 case kDummy:
1562 case kWinAttributes:
1563 if (sz > 0) {
1564 SZ_SKIP_BYTES (sz);
1565 }
1566 break;
1567 default:
1568 p = NULL;
1569 msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
1570 goto end;
1571 break;
1572 }
1573 }
1574
1575 end:
1576 return p;
1577 }
1578
1579 static const guchar *
rspamd_7zip_read_next_section(struct rspamd_task * task,const guchar * p,const guchar * end,struct rspamd_archive * arch)1580 rspamd_7zip_read_next_section (struct rspamd_task *task,
1581 const guchar *p, const guchar *end,
1582 struct rspamd_archive *arch)
1583 {
1584 guchar t = *p;
1585
1586 SZ_SKIP_BYTES(1);
1587
1588 msg_debug_archive ("7zip: read section %xc", t);
1589
1590 switch (t) {
1591 case kHeader:
1592 /* We just skip byte and go further */
1593 break;
1594 case kEncodedHeader:
1595 /*
1596 * In fact, headers are just packed, but we assume it as
1597 * encrypted to distinguish from the normal archives
1598 */
1599 msg_debug_archive ("7zip: encoded header, needs to be uncompressed");
1600 arch->flags |= RSPAMD_ARCHIVE_CANNOT_READ;
1601 p = NULL; /* Cannot get anything useful */
1602 break;
1603 case kArchiveProperties:
1604 p = rspamd_7zip_read_archive_props (task, p, end, arch);
1605 break;
1606 case kMainStreamsInfo:
1607 p = rspamd_7zip_read_main_streams_info (task, p, end, arch);
1608 break;
1609 case kAdditionalStreamsInfo:
1610 p = rspamd_7zip_read_main_streams_info (task, p, end, arch);
1611 break;
1612 case kFilesInfo:
1613 p = rspamd_7zip_read_files_info (task, p, end, arch);
1614 break;
1615 case kEnd:
1616 p = NULL;
1617 msg_debug_archive ("7zip: read final section");
1618 break;
1619 default:
1620 p = NULL;
1621 msg_debug_archive ("bad 7zip type: %xc; %s", t, G_STRLOC);
1622 break;
1623 }
1624
1625 return p;
1626 }
1627
1628 static void
rspamd_archive_process_7zip(struct rspamd_task * task,struct rspamd_mime_part * part)1629 rspamd_archive_process_7zip (struct rspamd_task *task,
1630 struct rspamd_mime_part *part)
1631 {
1632 struct rspamd_archive *arch;
1633 const guchar *start, *p, *end;
1634 const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
1635 guint64 section_offset = 0, section_length = 0;
1636
1637 start = part->parsed_data.begin;
1638 p = start;
1639 end = p + part->parsed_data.len;
1640
1641 if (end - p <= sizeof (guint64) + sizeof (guint32) ||
1642 memcmp (p, sz_magic, sizeof (sz_magic)) != 0) {
1643 msg_debug_archive ("7z archive is invalid (no 7z magic)");
1644
1645 return;
1646 }
1647
1648 arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
1649 arch->files = g_ptr_array_new ();
1650 arch->type = RSPAMD_ARCHIVE_7ZIP;
1651 rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
1652 arch);
1653
1654 /* Magic (6 bytes) + version (2 bytes) + crc32 (4 bytes) */
1655 p += sizeof (guint64) + sizeof (guint32);
1656
1657 SZ_READ_UINT64(section_offset);
1658 SZ_READ_UINT64(section_length);
1659
1660 if (end - p > sizeof (guint32)) {
1661 p += sizeof (guint32);
1662 }
1663 else {
1664 msg_debug_archive ("7z archive is invalid (truncated crc)");
1665
1666 return;
1667 }
1668
1669 if (end - p > section_offset) {
1670 p += section_offset;
1671 }
1672 else {
1673 msg_debug_archive ("7z archive is invalid (incorrect section offset)");
1674
1675 return;
1676 }
1677
1678 while ((p = rspamd_7zip_read_next_section (task, p, end, arch)) != NULL);
1679
1680 part->part_type = RSPAMD_MIME_PART_ARCHIVE;
1681 part->specific.arch = arch;
1682 if (part->cd != NULL) {
1683 arch->archive_name = &part->cd->filename;
1684 }
1685 arch->size = part->parsed_data.len;
1686 }
1687
1688 static void
rspamd_archive_process_gzip(struct rspamd_task * task,struct rspamd_mime_part * part)1689 rspamd_archive_process_gzip (struct rspamd_task *task,
1690 struct rspamd_mime_part *part) {
1691 struct rspamd_archive *arch;
1692 const guchar *start, *p, *end;
1693 const guchar gz_magic[] = {0x1F, 0x8B};
1694 guchar flags;
1695
1696 start = part->parsed_data.begin;
1697 p = start;
1698 end = p + part->parsed_data.len;
1699
1700 if (end - p <= 10 || memcmp (p, gz_magic, sizeof (gz_magic)) != 0) {
1701 msg_debug_archive ("gzip archive is invalid (no gzip magic)");
1702
1703 return;
1704 }
1705
1706 arch = rspamd_mempool_alloc0 (task->task_pool, sizeof (*arch));
1707 arch->files = g_ptr_array_sized_new (1);
1708 arch->type = RSPAMD_ARCHIVE_GZIP;
1709 rspamd_mempool_add_destructor (task->task_pool, rspamd_archive_dtor,
1710 arch);
1711
1712 flags = p[3];
1713
1714 if (flags & (1u << 5)) {
1715 arch->flags |= RSPAMD_ARCHIVE_ENCRYPTED;
1716 }
1717
1718 if (flags & (1u << 3)) {
1719 /* We have file name presented in archive, try to use it */
1720 if (flags & (1u << 1)) {
1721 /* Multipart */
1722 p += 12;
1723 }
1724 else {
1725 p += 10;
1726 }
1727
1728 if (flags & (1u << 2)) {
1729 /* Optional section */
1730 guint16 optlen = 0;
1731
1732 RAR_READ_UINT16 (optlen);
1733
1734 if (end <= p + optlen) {
1735 msg_debug_archive ("gzip archive is invalid, bad extra length: %d",
1736 (int)optlen);
1737
1738 return;
1739 }
1740
1741 p += optlen;
1742 }
1743
1744 /* Read file name */
1745 const guchar *fname_start = p;
1746
1747 while (p < end) {
1748 if (*p == '\0') {
1749 if (p > fname_start) {
1750 struct rspamd_archive_file *f;
1751
1752 f = g_malloc0 (sizeof (*f));
1753 f->fname = rspamd_archive_file_try_utf (task, fname_start,
1754 p - fname_start);
1755
1756 if (f->fname) {
1757 g_ptr_array_add (arch->files, f);
1758 }
1759 else {
1760 /* Invalid filename, skip */
1761 g_free (f);
1762 }
1763
1764 goto set;
1765 }
1766 }
1767
1768 p ++;
1769 }
1770
1771 /* Wrong filename, not zero terminated */
1772 msg_debug_archive ("gzip archive is invalid, bad filename at pos %d",
1773 (int)(p - start));
1774
1775 return;
1776 }
1777
1778 /* Fallback, we need to extract file name from archive name if possible */
1779 if (part->cd && part->cd->filename.len > 0) {
1780 const gchar *dot_pos, *slash_pos;
1781
1782 dot_pos = rspamd_memrchr (part->cd->filename.begin, '.',
1783 part->cd->filename.len);
1784
1785 if (dot_pos) {
1786 struct rspamd_archive_file *f;
1787
1788 slash_pos = rspamd_memrchr (part->cd->filename.begin, '/',
1789 part->cd->filename.len);
1790
1791 if (slash_pos && slash_pos < dot_pos) {
1792 f = g_malloc0 (sizeof (*f));
1793 f->fname = g_string_sized_new (dot_pos - slash_pos);
1794 g_string_append_len (f->fname, slash_pos + 1,
1795 dot_pos - slash_pos - 1);
1796
1797 msg_debug_archive ("fallback to gzip filename based on cd: %v",
1798 f->fname);
1799
1800 g_ptr_array_add (arch->files, f);
1801
1802 goto set;
1803 }
1804 else {
1805 const gchar *fname_start = part->cd->filename.begin;
1806
1807 f = g_malloc0 (sizeof (*f));
1808
1809 if (memchr (fname_start, '.', part->cd->filename.len) != dot_pos) {
1810 /* Double dots, something like foo.exe.gz */
1811 f->fname = g_string_sized_new (dot_pos - fname_start);
1812 g_string_append_len (f->fname, fname_start,
1813 dot_pos - fname_start);
1814 }
1815 else {
1816 /* Single dot, something like foo.gzz */
1817 f->fname = g_string_sized_new (part->cd->filename.len);
1818 g_string_append_len (f->fname, fname_start,
1819 part->cd->filename.len);
1820 }
1821
1822 msg_debug_archive ("fallback to gzip filename based on cd: %v",
1823 f->fname);
1824
1825 g_ptr_array_add (arch->files, f);
1826
1827 goto set;
1828 }
1829 }
1830 }
1831
1832 return;
1833
1834 set:
1835 /* Set archive data */
1836 part->part_type = RSPAMD_MIME_PART_ARCHIVE;
1837 part->specific.arch = arch;
1838
1839 if (part->cd) {
1840 arch->archive_name = &part->cd->filename;
1841 }
1842
1843 arch->size = part->parsed_data.len;
1844 }
1845
1846 static gboolean
rspamd_archive_cheat_detect(struct rspamd_mime_part * part,const gchar * str,const guchar * magic_start,gsize magic_len)1847 rspamd_archive_cheat_detect (struct rspamd_mime_part *part, const gchar *str,
1848 const guchar *magic_start, gsize magic_len)
1849 {
1850 struct rspamd_content_type *ct;
1851 const gchar *p;
1852 rspamd_ftok_t srch, *fname;
1853
1854 ct = part->ct;
1855 RSPAMD_FTOK_ASSIGN (&srch, "application");
1856
1857 if (ct && ct->type.len && ct->subtype.len > 0 && rspamd_ftok_cmp (&ct->type,
1858 &srch) == 0) {
1859 if (rspamd_substring_search_caseless (ct->subtype.begin, ct->subtype.len,
1860 str, strlen (str)) != -1) {
1861 /* We still need to check magic, see #1848 */
1862 if (magic_start != NULL) {
1863 if (part->parsed_data.len > magic_len &&
1864 memcmp (part->parsed_data.begin,
1865 magic_start, magic_len) == 0) {
1866 return TRUE;
1867 }
1868 /* No magic, refuse this type of archive */
1869 return FALSE;
1870 }
1871 else {
1872 return TRUE;
1873 }
1874 }
1875 }
1876
1877 if (part->cd) {
1878 fname = &part->cd->filename;
1879
1880 if (fname && fname->len > strlen (str)) {
1881 p = fname->begin + fname->len - strlen (str);
1882
1883 if (rspamd_lc_cmp (p, str, strlen (str)) == 0) {
1884 if (*(p - 1) == '.') {
1885 if (magic_start != NULL) {
1886 if (part->parsed_data.len > magic_len &&
1887 memcmp (part->parsed_data.begin,
1888 magic_start, magic_len) == 0) {
1889 return TRUE;
1890 }
1891 /* No magic, refuse this type of archive */
1892 return FALSE;
1893 }
1894
1895 return TRUE;
1896 }
1897 }
1898 }
1899
1900 if (magic_start != NULL) {
1901 if (part->parsed_data.len > magic_len &&
1902 memcmp (part->parsed_data.begin, magic_start, magic_len) == 0) {
1903 return TRUE;
1904 }
1905 }
1906 }
1907 else {
1908 if (magic_start != NULL) {
1909 if (part->parsed_data.len > magic_len &&
1910 memcmp (part->parsed_data.begin, magic_start, magic_len) == 0) {
1911 return TRUE;
1912 }
1913 }
1914 }
1915
1916 return FALSE;
1917 }
1918
1919 void
rspamd_archives_process(struct rspamd_task * task)1920 rspamd_archives_process (struct rspamd_task *task)
1921 {
1922 guint i;
1923 struct rspamd_mime_part *part;
1924 const guchar rar_magic[] = {0x52, 0x61, 0x72, 0x21, 0x1A, 0x07};
1925 const guchar zip_magic[] = {0x50, 0x4b, 0x03, 0x04};
1926 const guchar sz_magic[] = {'7', 'z', 0xBC, 0xAF, 0x27, 0x1C};
1927 const guchar gz_magic[] = {0x1F, 0x8B, 0x08};
1928
1929 PTR_ARRAY_FOREACH (MESSAGE_FIELD (task, parts), i, part) {
1930 if (part->part_type == RSPAMD_MIME_PART_UNDEFINED) {
1931 if (part->parsed_data.len > 0) {
1932 if (rspamd_archive_cheat_detect (part, "zip",
1933 zip_magic, sizeof (zip_magic))) {
1934 rspamd_archive_process_zip (task, part);
1935 }
1936 else if (rspamd_archive_cheat_detect (part, "rar",
1937 rar_magic, sizeof (rar_magic))) {
1938 rspamd_archive_process_rar (task, part);
1939 }
1940 else if (rspamd_archive_cheat_detect (part, "7z",
1941 sz_magic, sizeof (sz_magic))) {
1942 rspamd_archive_process_7zip (task, part);
1943 }
1944 else if (rspamd_archive_cheat_detect (part, "gz",
1945 gz_magic, sizeof (gz_magic))) {
1946 rspamd_archive_process_gzip (task, part);
1947 }
1948
1949 if (part->ct && (part->ct->flags & RSPAMD_CONTENT_TYPE_TEXT) &&
1950 part->part_type == RSPAMD_MIME_PART_ARCHIVE &&
1951 part->specific.arch) {
1952 struct rspamd_archive *arch = part->specific.arch;
1953
1954 msg_info_task ("found %s archive with incorrect content-type: %T/%T",
1955 rspamd_archive_type_str (arch->type),
1956 &part->ct->type, &part->ct->subtype);
1957
1958 if (!(part->ct->flags & RSPAMD_CONTENT_TYPE_MISSING)) {
1959 part->ct->flags |= RSPAMD_CONTENT_TYPE_BROKEN;
1960 }
1961 }
1962 }
1963 }
1964 }
1965 }
1966
1967
1968 const gchar *
rspamd_archive_type_str(enum rspamd_archive_type type)1969 rspamd_archive_type_str (enum rspamd_archive_type type)
1970 {
1971 const gchar *ret = "unknown";
1972
1973 switch (type) {
1974 case RSPAMD_ARCHIVE_ZIP:
1975 ret = "zip";
1976 break;
1977 case RSPAMD_ARCHIVE_RAR:
1978 ret = "rar";
1979 break;
1980 case RSPAMD_ARCHIVE_7ZIP:
1981 ret = "7z";
1982 break;
1983 case RSPAMD_ARCHIVE_GZIP:
1984 ret = "gz";
1985 break;
1986 }
1987
1988 return ret;
1989 }
1990