1 /*
2 * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
3 * Copyright (C) 2009-2013 Sourcefire, Inc.
4 *
5 * Authors: Tomasz Kojm <tkojm@clamav.net>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include <stdio.h>
23 #include <string.h>
24 #ifdef HAVE_UNISTD_H
25 #include <unistd.h>
26 #endif
27 #include <stdlib.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <fcntl.h>
31
32 #include "clamav.h"
33 #include "others.h"
34 #include "macho.h"
35 #include "execs.h"
36 #include "scanners.h"
37
38 #define CLI_TMPUNLK() \
39 if (!ctx->engine->keeptmp) { \
40 if (cli_unlink(tempfile)) { \
41 free(tempfile); \
42 return CL_EUNLINK; \
43 } \
44 }
45
46 #define EC32(v, conv) (conv ? cbswap32(v) : v)
47 #define EC64(v, conv) (conv ? cbswap64(v) : v)
48
49 struct macho_hdr {
50 uint32_t magic;
51 uint32_t cpu_type;
52 uint32_t cpu_subtype;
53 uint32_t filetype;
54 uint32_t ncmds;
55 uint32_t sizeofcmds;
56 uint32_t flags;
57 };
58
59 struct macho_load_cmd {
60 uint32_t cmd;
61 uint32_t cmdsize;
62 };
63
64 struct macho_segment_cmd {
65 char segname[16];
66 uint32_t vmaddr;
67 uint32_t vmsize;
68 uint32_t fileoff;
69 uint32_t filesize;
70 uint32_t maxprot;
71 uint32_t initprot;
72 uint32_t nsects;
73 uint32_t flags;
74 };
75
76 struct macho_segment_cmd64 {
77 char segname[16];
78 uint64_t vmaddr;
79 uint64_t vmsize;
80 uint64_t fileoff;
81 uint64_t filesize;
82 uint32_t maxprot;
83 uint32_t initprot;
84 uint32_t nsects;
85 uint32_t flags;
86 };
87
88 struct macho_section {
89 char sectname[16];
90 char segname[16];
91 uint32_t addr;
92 uint32_t size;
93 uint32_t offset;
94 uint32_t align;
95 uint32_t reloff;
96 uint32_t nreloc;
97 uint32_t flags;
98 uint32_t res1;
99 uint32_t res2;
100 };
101
102 struct macho_section64 {
103 char sectname[16];
104 char segname[16];
105 uint64_t addr;
106 uint64_t size;
107 uint32_t offset;
108 uint32_t align;
109 uint32_t reloff;
110 uint32_t nreloc;
111 uint32_t flags;
112 uint32_t res1;
113 uint32_t res2;
114 };
115
116 struct macho_thread_state_ppc {
117 uint32_t srr0; /* PC */
118 uint32_t srr1;
119 uint32_t reg[32];
120 uint32_t cr;
121 uint32_t xer;
122 uint32_t lr;
123 uint32_t ctr;
124 uint32_t mq;
125 uint32_t vrsave;
126 };
127
128 struct macho_thread_state_ppc64 {
129 uint64_t srr0; /* PC */
130 uint64_t srr1;
131 uint64_t reg[32];
132 uint32_t cr;
133 uint64_t xer;
134 uint64_t lr;
135 uint64_t ctr;
136 uint32_t vrsave;
137 };
138
139 struct macho_thread_state_x86 {
140 uint32_t eax;
141 uint32_t ebx;
142 uint32_t ecx;
143 uint32_t edx;
144 uint32_t edi;
145 uint32_t esi;
146 uint32_t ebp;
147 uint32_t esp;
148 uint32_t ss;
149 uint32_t eflags;
150 uint32_t eip;
151 uint32_t cs;
152 uint32_t ds;
153 uint32_t es;
154 uint32_t fs;
155 uint32_t gs;
156 };
157
158 struct macho_fat_header {
159 uint32_t magic;
160 uint32_t nfats;
161 };
162
163 struct macho_fat_arch {
164 uint32_t cputype;
165 uint32_t cpusubtype;
166 uint32_t offset;
167 uint32_t size;
168 uint32_t align;
169 };
170
171 #define RETURN_BROKEN \
172 if (matcher) \
173 return -1; \
174 if (SCAN_HEURISTIC_BROKEN) { \
175 if (CL_VIRUS == cli_append_virus(ctx, "Heuristics.Broken.Executable")) \
176 return CL_VIRUS; \
177 } \
178 return CL_EFORMAT
179
cli_rawaddr(uint32_t vaddr,struct cli_exe_section * sects,uint16_t nsects,unsigned int * err)180 static uint32_t cli_rawaddr(uint32_t vaddr, struct cli_exe_section *sects, uint16_t nsects, unsigned int *err)
181 {
182 unsigned int i, found = 0;
183
184 for (i = 0; i < nsects; i++) {
185 if (sects[i].rva <= vaddr && sects[i].rva + sects[i].vsz > vaddr) {
186 found = 1;
187 break;
188 }
189 }
190
191 if (!found) {
192 *err = 1;
193 return 0;
194 }
195
196 *err = 0;
197 return vaddr - sects[i].rva + sects[i].raw;
198 }
199
cli_scanmacho(cli_ctx * ctx,struct cli_exe_info * fileinfo)200 int cli_scanmacho(cli_ctx *ctx, struct cli_exe_info *fileinfo)
201 {
202 struct macho_hdr hdr;
203 struct macho_load_cmd load_cmd;
204 struct macho_segment_cmd segment_cmd;
205 struct macho_segment_cmd64 segment_cmd64;
206 struct macho_section section;
207 struct macho_section64 section64;
208 unsigned int i, j, sect = 0, conv, m64, nsects, matcher = 0;
209 unsigned int arch = 0, ep = 0, err;
210 struct cli_exe_section *sections = NULL;
211 char name[16];
212 fmap_t *map = ctx->fmap;
213 ssize_t at;
214
215 if (fileinfo) {
216 matcher = 1;
217
218 // TODO This code assumes fileinfo->offset == 0, which might not always
219 // be the case. For now just print this debug message and continue on
220 if (0 != fileinfo->offset) {
221 cli_dbgmsg("cli_scanmacho: Assumption Violated: fileinfo->offset != 0\n");
222 }
223 }
224
225 if (fmap_readn(map, &hdr, 0, sizeof(hdr)) != sizeof(hdr)) {
226 cli_dbgmsg("cli_scanmacho: Can't read header\n");
227 return matcher ? -1 : CL_EFORMAT;
228 }
229 at = sizeof(hdr);
230
231 if (hdr.magic == 0xfeedface) {
232 conv = 0;
233 m64 = 0;
234 } else if (hdr.magic == 0xcefaedfe) {
235 conv = 1;
236 m64 = 0;
237 } else if (hdr.magic == 0xfeedfacf) {
238 conv = 0;
239 m64 = 1;
240 } else if (hdr.magic == 0xcffaedfe) {
241 conv = 1;
242 m64 = 1;
243 } else {
244 cli_dbgmsg("cli_scanmacho: Incorrect magic\n");
245 return matcher ? -1 : CL_EFORMAT;
246 }
247
248 switch (EC32(hdr.cpu_type, conv)) {
249 case 7:
250 if (!matcher)
251 cli_dbgmsg("MACHO: CPU Type: Intel 32-bit\n");
252 arch = 1;
253 break;
254 case 7 | 0x1000000:
255 if (!matcher)
256 cli_dbgmsg("MACHO: CPU Type: Intel 64-bit\n");
257 break;
258 case 12:
259 if (!matcher)
260 cli_dbgmsg("MACHO: CPU Type: ARM\n");
261 break;
262 case 14:
263 if (!matcher)
264 cli_dbgmsg("MACHO: CPU Type: SPARC\n");
265 break;
266 case 18:
267 if (!matcher)
268 cli_dbgmsg("MACHO: CPU Type: POWERPC 32-bit\n");
269 arch = 2;
270 break;
271 case 18 | 0x1000000:
272 if (!matcher)
273 cli_dbgmsg("MACHO: CPU Type: POWERPC 64-bit\n");
274 arch = 3;
275 break;
276 default:
277 if (!matcher)
278 cli_dbgmsg("MACHO: CPU Type: ** UNKNOWN ** (%u)\n", EC32(hdr.cpu_type, conv));
279 break;
280 }
281
282 if (!matcher) switch (EC32(hdr.filetype, conv)) {
283 case 0x1: /* MH_OBJECT */
284 cli_dbgmsg("MACHO: Filetype: Relocatable object file\n");
285 break;
286 case 0x2: /* MH_EXECUTE */
287 cli_dbgmsg("MACHO: Filetype: Executable\n");
288 break;
289 case 0x3: /* MH_FVMLIB */
290 cli_dbgmsg("MACHO: Filetype: Fixed VM shared library file\n");
291 break;
292 case 0x4: /* MH_CORE */
293 cli_dbgmsg("MACHO: Filetype: Core file\n");
294 break;
295 case 0x5: /* MH_PRELOAD */
296 cli_dbgmsg("MACHO: Filetype: Preloaded executable file\n");
297 break;
298 case 0x6: /* MH_DYLIB */
299 cli_dbgmsg("MACHO: Filetype: Dynamically bound shared library\n");
300 break;
301 case 0x7: /* MH_DYLINKER */
302 cli_dbgmsg("MACHO: Filetype: Dynamic link editor\n");
303 break;
304 case 0x8: /* MH_BUNDLE */
305 cli_dbgmsg("MACHO: Filetype: Dynamically bound bundle file\n");
306 break;
307 case 0x9: /* MH_DYLIB_STUB */
308 cli_dbgmsg("MACHO: Filetype: Shared library stub for static\n");
309 break;
310 default:
311 cli_dbgmsg("MACHO: Filetype: ** UNKNOWN ** (0x%x)\n", EC32(hdr.filetype, conv));
312 }
313
314 if (!matcher) {
315 cli_dbgmsg("MACHO: Number of load commands: %u\n", EC32(hdr.ncmds, conv));
316 cli_dbgmsg("MACHO: Size of load commands: %u\n", EC32(hdr.sizeofcmds, conv));
317 }
318
319 if (m64)
320 at += 4;
321
322 hdr.ncmds = EC32(hdr.ncmds, conv);
323 if (!hdr.ncmds || hdr.ncmds > 1024) {
324 cli_dbgmsg("cli_scanmacho: Invalid number of load commands (%u)\n", hdr.ncmds);
325 RETURN_BROKEN;
326 }
327
328 for (i = 0; i < hdr.ncmds; i++) {
329 if (fmap_readn(map, &load_cmd, at, sizeof(load_cmd)) != sizeof(load_cmd)) {
330 cli_dbgmsg("cli_scanmacho: Can't read load command\n");
331 free(sections);
332 RETURN_BROKEN;
333 }
334 at += sizeof(load_cmd);
335 /*
336 if((m64 && EC32(load_cmd.cmdsize, conv) % 8) || (!m64 && EC32(load_cmd.cmdsize, conv) % 4)) {
337 cli_dbgmsg("cli_scanmacho: Invalid command size (%u)\n", EC32(load_cmd.cmdsize, conv));
338 free(sections);
339 RETURN_BROKEN;
340 }
341 */
342 load_cmd.cmd = EC32(load_cmd.cmd, conv);
343 if ((m64 && load_cmd.cmd == 0x19) || (!m64 && load_cmd.cmd == 0x01)) { /* LC_SEGMENT */
344 if (m64) {
345 if (fmap_readn(map, &segment_cmd64, at, sizeof(segment_cmd64)) != sizeof(segment_cmd64)) {
346 cli_dbgmsg("cli_scanmacho: Can't read segment command\n");
347 free(sections);
348 RETURN_BROKEN;
349 }
350 at += sizeof(segment_cmd64);
351 nsects = EC32(segment_cmd64.nsects, conv);
352 strncpy(name, segment_cmd64.segname, sizeof(name));
353 name[sizeof(name) - 1] = '\0';
354 } else {
355 if (fmap_readn(map, &segment_cmd, at, sizeof(segment_cmd)) != sizeof(segment_cmd)) {
356 cli_dbgmsg("cli_scanmacho: Can't read segment command\n");
357 free(sections);
358 RETURN_BROKEN;
359 }
360 at += sizeof(segment_cmd);
361 nsects = EC32(segment_cmd.nsects, conv);
362 strncpy(name, segment_cmd.segname, sizeof(name));
363 name[sizeof(name) - 1] = '\0';
364 }
365 if (!matcher) {
366 cli_dbgmsg("MACHO: Segment name: %s\n", name);
367 cli_dbgmsg("MACHO: Number of sections: %u\n", nsects);
368 }
369 if (nsects > 255) {
370 cli_dbgmsg("cli_scanmacho: Invalid number of sections\n");
371 free(sections);
372 RETURN_BROKEN;
373 }
374 if (!nsects) {
375 if (!matcher)
376 cli_dbgmsg("MACHO: ------------------\n");
377 continue;
378 }
379 sections = (struct cli_exe_section *)cli_realloc2(sections, (sect + nsects) * sizeof(struct cli_exe_section));
380 if (!sections) {
381 cli_errmsg("cli_scanmacho: Can't allocate memory for 'sections'\n");
382 return matcher ? -1 : CL_EMEM;
383 }
384
385 for (j = 0; j < nsects; j++) {
386 if (m64) {
387 if (fmap_readn(map, §ion64, at, sizeof(section64)) != sizeof(section64)) {
388 cli_dbgmsg("cli_scanmacho: Can't read section\n");
389 free(sections);
390 RETURN_BROKEN;
391 }
392 at += sizeof(section64);
393 sections[sect].rva = EC64(section64.addr, conv);
394 sections[sect].vsz = EC64(section64.size, conv);
395 sections[sect].raw = EC32(section64.offset, conv);
396 section64.align = 1 << EC32(section64.align, conv);
397 sections[sect].rsz = sections[sect].vsz + (section64.align - (sections[sect].vsz % section64.align)) % section64.align; /* most likely we can assume it's the same as .vsz */
398 strncpy(name, section64.sectname, sizeof(name));
399 name[sizeof(name) - 1] = '\0';
400 } else {
401 if (fmap_readn(map, §ion, at, sizeof(section)) != sizeof(section)) {
402 cli_dbgmsg("cli_scanmacho: Can't read section\n");
403 free(sections);
404 RETURN_BROKEN;
405 }
406 at += sizeof(section);
407 sections[sect].rva = EC32(section.addr, conv);
408 sections[sect].vsz = EC32(section.size, conv);
409 sections[sect].raw = EC32(section.offset, conv);
410 if (EC32(section.align, conv) >= 32) {
411 cli_dbgmsg("cli_scanmacho: Section aligned is malformed\n");
412 free(sections);
413 RETURN_BROKEN;
414 }
415 section.align = 1 << EC32(section.align, conv);
416 sections[sect].rsz = sections[sect].vsz + (section.align - (sections[sect].vsz % section.align)) % section.align;
417 strncpy(name, section.sectname, sizeof(name));
418 name[sizeof(name) - 1] = '\0';
419 }
420 if (!matcher) {
421 cli_dbgmsg("MACHO: --- Section %u ---\n", sect);
422 cli_dbgmsg("MACHO: Name: %s\n", name);
423 cli_dbgmsg("MACHO: Virtual address: 0x%x\n", (unsigned int)sections[sect].rva);
424 cli_dbgmsg("MACHO: Virtual size: %u\n", (unsigned int)sections[sect].vsz);
425 cli_dbgmsg("MACHO: Raw size: %u\n", (unsigned int)sections[sect].rsz);
426 if (sections[sect].raw)
427 cli_dbgmsg("MACHO: File offset: %u\n", (unsigned int)sections[sect].raw);
428 }
429 sect++;
430 }
431 if (!matcher)
432 cli_dbgmsg("MACHO: ------------------\n");
433
434 } else if (arch && (load_cmd.cmd == 0x4 || load_cmd.cmd == 0x5)) { /* LC_(UNIX)THREAD */
435 at += 8;
436 switch (arch) {
437 case 1: /* x86 */
438 {
439 struct macho_thread_state_x86 thread_state_x86;
440
441 if (fmap_readn(map, &thread_state_x86, at, sizeof(thread_state_x86)) != sizeof(thread_state_x86)) {
442 cli_dbgmsg("cli_scanmacho: Can't read thread_state_x86\n");
443 free(sections);
444 RETURN_BROKEN;
445 }
446 at += sizeof(thread_state_x86);
447 break;
448 }
449
450 case 2: /* PPC */
451 {
452 struct macho_thread_state_ppc thread_state_ppc;
453
454 if (fmap_readn(map, &thread_state_ppc, at, sizeof(thread_state_ppc)) != sizeof(thread_state_ppc)) {
455 cli_dbgmsg("cli_scanmacho: Can't read thread_state_ppc\n");
456 free(sections);
457 RETURN_BROKEN;
458 }
459 at += sizeof(thread_state_ppc);
460 ep = EC32(thread_state_ppc.srr0, conv);
461 break;
462 }
463
464 case 3: /* PPC64 */
465 {
466 struct macho_thread_state_ppc64 thread_state_ppc64;
467
468 if (fmap_readn(map, &thread_state_ppc64, at, sizeof(thread_state_ppc64)) != sizeof(thread_state_ppc64)) {
469 cli_dbgmsg("cli_scanmacho: Can't read thread_state_ppc64\n");
470 free(sections);
471 RETURN_BROKEN;
472 }
473 at += sizeof(thread_state_ppc64);
474 ep = EC64(thread_state_ppc64.srr0, conv);
475 break;
476 }
477 default:
478 cli_errmsg("cli_scanmacho: Invalid arch setting!\n");
479 free(sections);
480 return matcher ? -1 : CL_EARG;
481 }
482 } else {
483 if (EC32(load_cmd.cmdsize, conv) > sizeof(load_cmd))
484 at += EC32(load_cmd.cmdsize, conv) - sizeof(load_cmd);
485 }
486 }
487
488 if (ep) {
489 if (!matcher)
490 cli_dbgmsg("Entry Point: 0x%x\n", ep);
491 if (sections) {
492 ep = cli_rawaddr(ep, sections, sect, &err);
493 if (err) {
494 cli_dbgmsg("cli_scanmacho: Can't calculate EP offset\n");
495 free(sections);
496 return matcher ? -1 : CL_EFORMAT;
497 }
498 if (!matcher)
499 cli_dbgmsg("Entry Point file offset: %u\n", ep);
500 }
501 }
502
503 if (matcher) {
504 fileinfo->ep = ep;
505 fileinfo->nsections = sect;
506 fileinfo->sections = sections;
507 return 0;
508 } else {
509 free(sections);
510 return CL_SUCCESS;
511 }
512 }
513
cli_machoheader(cli_ctx * ctx,struct cli_exe_info * fileinfo)514 int cli_machoheader(cli_ctx *ctx, struct cli_exe_info *fileinfo)
515 {
516 return cli_scanmacho(ctx, fileinfo);
517 }
518
cli_scanmacho_unibin(cli_ctx * ctx)519 int cli_scanmacho_unibin(cli_ctx *ctx)
520 {
521 struct macho_fat_header fat_header;
522 struct macho_fat_arch fat_arch;
523 unsigned int conv, i, matcher = 0;
524 int ret = CL_CLEAN;
525 fmap_t *map = ctx->fmap;
526 ssize_t at;
527
528 if (fmap_readn(map, &fat_header, 0, sizeof(fat_header)) != sizeof(fat_header)) {
529 cli_dbgmsg("cli_scanmacho_unibin: Can't read fat_header\n");
530 return CL_EFORMAT;
531 }
532 at = sizeof(fat_header);
533
534 if (fat_header.magic == 0xcafebabe) {
535 conv = 0;
536 } else if (fat_header.magic == 0xbebafeca) {
537 conv = 1;
538 } else {
539 cli_dbgmsg("cli_scanmacho_unibin: Incorrect magic\n");
540 return CL_EFORMAT;
541 }
542
543 fat_header.nfats = EC32(fat_header.nfats, conv);
544 if ((fat_header.nfats & 0xffff) >= 39) /* Java Bytecode */
545 return CL_CLEAN;
546
547 if (fat_header.nfats > 32) {
548 cli_dbgmsg("cli_scanmacho_unibin: Invalid number of architectures\n");
549 return CL_EFORMAT;
550 }
551 cli_dbgmsg("UNIBIN: Number of architectures: %u\n", (unsigned int)fat_header.nfats);
552 for (i = 0; i < fat_header.nfats; i++) {
553 if (fmap_readn(map, &fat_arch, at, sizeof(fat_arch)) != sizeof(fat_arch)) {
554 cli_dbgmsg("cli_scanmacho_unibin: Can't read fat_arch\n");
555 RETURN_BROKEN;
556 }
557 at += sizeof(fat_arch);
558 fat_arch.offset = EC32(fat_arch.offset, conv);
559 fat_arch.size = EC32(fat_arch.size, conv);
560 cli_dbgmsg("UNIBIN: Binary %u of %u\n", i + 1, fat_header.nfats);
561 cli_dbgmsg("UNIBIN: File offset: %u\n", fat_arch.offset);
562 cli_dbgmsg("UNIBIN: File size: %u\n", fat_arch.size);
563
564 /* The offset must be greater than the location of the header or we risk
565 re-scanning the same data over and over again. The scan recursion max
566 will save us, but it will still cause other problems and waste CPU. */
567 if (fat_arch.offset < at) {
568 cli_dbgmsg("Invalid fat offset: %d\n", fat_arch.offset);
569 RETURN_BROKEN;
570 }
571
572 ret = cli_magic_scan_nested_fmap_type(map, fat_arch.offset, fat_arch.size, ctx, CL_TYPE_ANY, NULL);
573 if (ret == CL_VIRUS)
574 break;
575 }
576
577 return ret; /* result from the last binary */
578 }
579
cli_unpackmacho(cli_ctx * ctx)580 int cli_unpackmacho(cli_ctx *ctx)
581 {
582 char *tempfile;
583 int ndesc;
584 struct cli_bc_ctx *bc_ctx;
585 int ret;
586
587 /* Bytecode BC_MACHO_UNPACKER hook */
588 bc_ctx = cli_bytecode_context_alloc();
589 if (!bc_ctx) {
590 cli_errmsg("cli_scanelf: can't allocate memory for bc_ctx\n");
591 return CL_EMEM;
592 }
593
594 cli_bytecode_context_setctx(bc_ctx, ctx);
595
596 ret = cli_bytecode_runhook(ctx, ctx->engine, bc_ctx, BC_MACHO_UNPACKER, ctx->fmap);
597 switch (ret) {
598 case CL_VIRUS:
599 cli_bytecode_context_destroy(bc_ctx);
600 return CL_VIRUS;
601 case CL_SUCCESS:
602 ndesc = cli_bytecode_context_getresult_file(bc_ctx, &tempfile);
603 cli_bytecode_context_destroy(bc_ctx);
604 if (ndesc != -1 && tempfile) {
605 if (ctx->engine->keeptmp)
606 cli_dbgmsg("cli_scanmacho: Unpacked and rebuilt executable saved in %s\n", tempfile);
607 else
608 cli_dbgmsg("cli_scanmacho: Unpacked and rebuilt executable\n");
609 lseek(ndesc, 0, SEEK_SET);
610 cli_dbgmsg("***** Scanning rebuilt Mach-O file *****\n");
611 if (cli_magic_scan_desc(ndesc, tempfile, ctx, NULL) == CL_VIRUS) {
612 close(ndesc);
613 CLI_TMPUNLK();
614 free(tempfile);
615 return CL_VIRUS;
616 }
617 close(ndesc);
618 CLI_TMPUNLK();
619 free(tempfile);
620 return CL_SUCCESS;
621 }
622 break;
623 default:
624 cli_bytecode_context_destroy(bc_ctx);
625 }
626
627 return CL_CLEAN;
628 }
629