1 /*
2 * Load, and verify ClamAV bytecode.
3 *
4 * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5 * Copyright (C) 2009-2013 Sourcefire, Inc.
6 *
7 * Authors: Török Edvin
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License version 2 as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21 * MA 02110-1301, USA.
22 */
23
24 #if HAVE_CONFIG_H
25 #include "clamav-config.h"
26 #endif
27
28 #include <string.h>
29 #include <assert.h>
30 #include <fcntl.h>
31
32 #if HAVE_JSON
33 #include "json.h"
34 #endif
35
36 #include "dconf.h"
37 #include "clamav.h"
38 #include "others.h"
39 #include "pe.h"
40 #include "bytecode.h"
41 #include "bytecode_priv.h"
42 #include "bytecode_detect.h"
43 #include "readdb.h"
44 #include "scanners.h"
45 #include "bytecode_api.h"
46 #include "bytecode_api_impl.h"
47 #include "builtin_bytecodes.h"
48
49 #ifndef MAX_TRACKED_BC
50 #define MAX_TRACKED_BC 64
51 #endif
52 #define BC_EVENTS_PER_SIG 2
53 #define MAX_BC_SIGEVENT_ID MAX_TRACKED_BC *BC_EVENTS_PER_SIG
54
55 cli_events_t *g_sigevents = NULL;
56 unsigned int g_sigid;
57
58 /* dummy values */
59 static const uint32_t nomatch[64] = {
60 0xdeadbeef, 0xdeaddead, 0xbeefdead, 0xdeaddead, 0xdeadbeef, 0, 0, 0,
61 0, 0, 0, 0, 0, 0, 0, 0,
62 0, 0, 0, 0, 0, 0, 0, 0,
63 0, 0, 0, 0, 0, 0, 0, 0,
64 0, 0, 0, 0, 0, 0, 0, 0,
65 0, 0, 0, 0, 0, 0, 0, 0,
66 0, 0, 0, 0, 0, 0, 0, 0,
67 0, 0, 0, 0, 0, 0, 0, 0};
68 static const uint32_t nooffsets[64] = {
69 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
70 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
71 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
72 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
73 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
74 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
75 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
76 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
77 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
78 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
79 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
80 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
81 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
82 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
83 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
84 CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE};
85
86 static const uint16_t nokind;
87 static const uint32_t nofilesize;
88 static const struct cli_pe_hook_data nopedata;
89
context_safe(struct cli_bc_ctx * ctx)90 static void context_safe(struct cli_bc_ctx *ctx)
91 {
92 /* make sure these are never NULL */
93 if (!ctx->hooks.kind)
94 ctx->hooks.kind = &nokind;
95 if (!ctx->hooks.match_counts)
96 ctx->hooks.match_counts = nomatch;
97 if (!ctx->hooks.match_offsets)
98 ctx->hooks.match_offsets = nooffsets;
99 if (!ctx->hooks.filesize)
100 ctx->hooks.filesize = &nofilesize;
101 if (!ctx->hooks.pedata)
102 ctx->hooks.pedata = &nopedata;
103 }
104
105 static int cli_bytecode_context_reset(struct cli_bc_ctx *ctx);
cli_bytecode_context_alloc(void)106 struct cli_bc_ctx *cli_bytecode_context_alloc(void)
107 {
108 struct cli_bc_ctx *ctx = cli_calloc(1, sizeof(*ctx));
109 if (!ctx) {
110 cli_errmsg("Out of memory allocating cli_bytecode_context_reset\n");
111 return NULL;
112 }
113 ctx->bytecode_timeout = 60000;
114 cli_bytecode_context_reset(ctx);
115 return ctx;
116 }
117
cli_bytecode_context_destroy(struct cli_bc_ctx * ctx)118 void cli_bytecode_context_destroy(struct cli_bc_ctx *ctx)
119 {
120 cli_bytecode_context_clear(ctx);
121 free(ctx);
122 }
123
cli_bytecode_context_getresult_file(struct cli_bc_ctx * ctx,char ** tempfilename)124 int cli_bytecode_context_getresult_file(struct cli_bc_ctx *ctx, char **tempfilename)
125 {
126 int fd;
127 *tempfilename = ctx->tempfile;
128 fd = ctx->outfd;
129 ctx->tempfile = NULL;
130 ctx->outfd = 0;
131 return fd;
132 }
133
134 /* resets bytecode state, so you can run another bytecode with same ctx */
cli_bytecode_context_reset(struct cli_bc_ctx * ctx)135 static int cli_bytecode_context_reset(struct cli_bc_ctx *ctx)
136 {
137 unsigned i;
138
139 free(ctx->opsizes);
140 ctx->opsizes = NULL;
141
142 free(ctx->values);
143 ctx->values = NULL;
144
145 free(ctx->operands);
146 ctx->operands = NULL;
147
148 if (ctx->outfd) {
149 cli_ctx *cctx = ctx->ctx;
150 if (ctx->outfd)
151 close(ctx->outfd);
152 if (ctx->tempfile && (!cctx || !cctx->engine->keeptmp)) {
153 cli_unlink(ctx->tempfile);
154 }
155 free(ctx->tempfile);
156 ctx->tempfile = NULL;
157 ctx->outfd = 0;
158 }
159 if (ctx->jsnormdir) {
160 char fullname[1025];
161 cli_ctx *cctx = ctx->ctx;
162 int fd, ret = CL_CLEAN;
163
164 if (!ctx->found) {
165 snprintf(fullname, 1024, "%s" PATHSEP "javascript", ctx->jsnormdir);
166 fd = open(fullname, O_RDONLY | O_BINARY);
167 if (fd >= 0) {
168 cctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
169
170 ret = cli_scan_desc(fd, cctx, CL_TYPE_HTML, 0, NULL, AC_SCAN_VIR, NULL, NULL);
171 if (ret == CL_CLEAN) {
172 if (lseek(fd, 0, SEEK_SET) == -1)
173 cli_dbgmsg("cli_bytecode: call to lseek() has failed\n");
174 else {
175 cctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
176
177 ret = cli_scan_desc(fd, cctx, CL_TYPE_TEXT_ASCII, 0, NULL, AC_SCAN_VIR, NULL, NULL);
178 }
179 }
180 close(fd);
181 }
182 }
183 if (!cctx || !cctx->engine->keeptmp) {
184 cli_rmdirs(ctx->jsnormdir);
185 }
186 free(ctx->jsnormdir);
187 if (ret != CL_CLEAN)
188 ctx->found = 1;
189 }
190 ctx->numParams = 0;
191 ctx->funcid = 0;
192 /* don't touch fmap, file_size, and hooks, sections, ctx, timeout, pdf* */
193 ctx->off = 0;
194 ctx->written = 0;
195 ctx->jsnormwritten = 0;
196 #if USE_MPOOL
197 if (ctx->mpool) {
198 mpool_destroy(ctx->mpool);
199 ctx->mpool = NULL;
200 }
201 #else
202 /*TODO: implement for no-mmap case too*/
203 #endif
204 for (i = 0; i < ctx->ninflates; i++)
205 cli_bcapi_inflate_done(ctx, i);
206 free(ctx->inflates);
207 ctx->inflates = NULL;
208 ctx->ninflates = 0;
209
210 for (i = 0; i < ctx->nlzmas; i++)
211 cli_bcapi_lzma_done(ctx, i);
212 free(ctx->lzmas);
213 ctx->lzmas = NULL;
214 ctx->nlzmas = 0;
215
216 #if HAVE_BZLIB_H
217 for (i = 0; i < ctx->nbzip2s; i++)
218 cli_bcapi_bzip2_done(ctx, i);
219 free(ctx->bzip2s);
220 ctx->bzip2s = NULL;
221 ctx->nbzip2s = 0;
222 #endif
223
224 for (i = 0; i < ctx->nbuffers; i++)
225 cli_bcapi_buffer_pipe_done(ctx, i);
226 free(ctx->buffers);
227 ctx->buffers = NULL;
228 ctx->nbuffers = 0;
229
230 for (i = 0; i < ctx->nhashsets; i++)
231 cli_bcapi_hashset_done(ctx, i);
232 free(ctx->hashsets);
233 ctx->hashsets = NULL;
234 ctx->nhashsets = 0;
235
236 for (i = 0; i < ctx->njsnorms; i++)
237 cli_bcapi_jsnorm_done(ctx, i);
238 free(ctx->jsnorms);
239 ctx->jsnorms = NULL;
240 ctx->njsnorms = 0;
241 ctx->jsnormdir = NULL;
242
243 for (i = 0; i < ctx->nmaps; i++)
244 cli_bcapi_map_done(ctx, i);
245 free(ctx->maps);
246 ctx->maps = NULL;
247 ctx->nmaps = 0;
248
249 /* Use input_switch() to free the extracted file fmap, if one exists */
250 cli_bcapi_input_switch(ctx, 0);
251
252 #if HAVE_JSON
253 free((json_object **)(ctx->jsonobjs));
254 ctx->jsonobjs = NULL;
255 ctx->njsonobjs = 0;
256 #endif
257
258 ctx->containertype = CL_TYPE_ANY;
259 return CL_SUCCESS;
260 }
261
cli_bytecode_context_clear(struct cli_bc_ctx * ctx)262 int cli_bytecode_context_clear(struct cli_bc_ctx *ctx)
263 {
264 cli_bytecode_context_reset(ctx);
265 memset(ctx, 0, sizeof(*ctx));
266 return CL_SUCCESS;
267 }
268
typesize(const struct cli_bc * bc,uint16_t type)269 static unsigned typesize(const struct cli_bc *bc, uint16_t type)
270 {
271 struct cli_bc_type *ty;
272 unsigned j;
273
274 type &= 0x7fff;
275 if (!type)
276 return 0;
277 if (type <= 8)
278 return 1;
279 if (type <= 16)
280 return 2;
281 if (type <= 32)
282 return 4;
283 if (type <= 64)
284 return 8;
285 ty = &bc->types[type - 65];
286 if (ty->size)
287 return ty->size;
288 switch (ty->kind) {
289 case 2:
290 case 3:
291 for (j = 0; j < ty->numElements; j++)
292 ty->size += typesize(bc, ty->containedTypes[j]);
293 break;
294 case 4:
295 ty->size = ty->numElements * typesize(bc, ty->containedTypes[0]);
296 break;
297 default:
298 break;
299 }
300 if (!ty->size && ty->kind != DFunctionType) {
301 cli_warnmsg("type %d size is 0\n", type - 65);
302 }
303 return ty->size;
304 }
305
typealign(const struct cli_bc * bc,uint16_t type)306 static unsigned typealign(const struct cli_bc *bc, uint16_t type)
307 {
308 type &= 0x7fff;
309 if (type <= 64) {
310 unsigned size = typesize(bc, type);
311 return size ? size : 1;
312 }
313 return bc->types[type - 65].align;
314 }
315
cli_bytecode_context_setfuncid(struct cli_bc_ctx * ctx,const struct cli_bc * bc,unsigned funcid)316 int cli_bytecode_context_setfuncid(struct cli_bc_ctx *ctx, const struct cli_bc *bc, unsigned funcid)
317 {
318 unsigned i, s = 0;
319 const struct cli_bc_func *func;
320 if (funcid >= bc->num_func) {
321 cli_errmsg("bytecode: function ID doesn't exist: %u\n", funcid);
322 return CL_EARG;
323 }
324 func = ctx->func = &bc->funcs[funcid];
325 ctx->bc = bc;
326 ctx->numParams = func->numArgs;
327 ctx->funcid = funcid;
328 if (func->numArgs) {
329 ctx->operands = cli_malloc(sizeof(*ctx->operands) * func->numArgs);
330 if (!ctx->operands) {
331 cli_errmsg("bytecode: error allocating memory for parameters\n");
332 return CL_EMEM;
333 }
334 ctx->opsizes = cli_malloc(sizeof(*ctx->opsizes) * func->numArgs);
335 if (!ctx->opsizes) {
336 cli_errmsg("bytecode: error allocating memory for opsizes\n");
337 return CL_EMEM;
338 }
339 for (i = 0; i < func->numArgs; i++) {
340 unsigned al = typealign(bc, func->types[i]);
341 s = (s + al - 1) & ~(al - 1);
342 ctx->operands[i] = s;
343 s += ctx->opsizes[i] = typesize(bc, func->types[i]);
344 }
345 }
346 s += 8; /* return value */
347 ctx->bytes = s;
348 ctx->values = cli_malloc(s);
349 if (!ctx->values) {
350 cli_errmsg("bytecode: error allocating memory for parameters\n");
351 return CL_EMEM;
352 }
353 return CL_SUCCESS;
354 }
355
type_isint(uint16_t type)356 static inline int type_isint(uint16_t type)
357 {
358 return type > 0 && type <= 64;
359 }
360
cli_bytecode_context_setparam_int(struct cli_bc_ctx * ctx,unsigned i,uint64_t c)361 int cli_bytecode_context_setparam_int(struct cli_bc_ctx *ctx, unsigned i, uint64_t c)
362 {
363 if (i >= ctx->numParams) {
364 cli_errmsg("bytecode: param index out of bounds: %u\n", i);
365 return CL_EARG;
366 }
367 if (!type_isint(ctx->func->types[i])) {
368 cli_errmsg("bytecode: parameter type mismatch\n");
369 return CL_EARG;
370 }
371 switch (ctx->opsizes[i]) {
372 case 1:
373 ctx->values[ctx->operands[i]] = c;
374 break;
375 case 2:
376 *(uint16_t *)&ctx->values[ctx->operands[i]] = c;
377 break;
378 case 4:
379 *(uint32_t *)&ctx->values[ctx->operands[i]] = c;
380 break;
381 case 8:
382 *(uint64_t *)&ctx->values[ctx->operands[i]] = c;
383 break;
384 }
385 return CL_SUCCESS;
386 }
387
cli_bytecode_context_setparam_ptr(struct cli_bc_ctx * ctx,unsigned i,void * data,unsigned datalen)388 int cli_bytecode_context_setparam_ptr(struct cli_bc_ctx *ctx, unsigned i, void *data, unsigned datalen)
389 {
390 UNUSEDPARAM(ctx);
391 UNUSEDPARAM(i);
392 UNUSEDPARAM(data);
393 UNUSEDPARAM(datalen);
394 cli_errmsg("Pointer parameters are not implemented yet!\n");
395 return CL_EARG;
396 }
397
readNumber(const unsigned char * p,unsigned * off,unsigned len,char * ok)398 static inline uint64_t readNumber(const unsigned char *p, unsigned *off, unsigned len, char *ok)
399 {
400 uint64_t n = 0;
401 unsigned i, newoff, lim, p0 = p[*off], shift = 0;
402
403 lim = p0 - 0x60;
404 if (lim > 0x10) {
405 cli_errmsg("Invalid number type: %c\n", p0);
406 *ok = 0;
407 return 0;
408 }
409 newoff = *off + lim + 1;
410 if (newoff > len) {
411 cli_errmsg("End of line encountered while reading number\n");
412 *ok = 0;
413 return 0;
414 }
415
416 if (p0 == 0x60) {
417 *off = newoff;
418 return 0;
419 }
420
421 for (i = *off + 1; i < newoff; i++) {
422 uint64_t v = p[i];
423 if (UNLIKELY((v & 0xf0) != 0x60)) {
424 cli_errmsg("Invalid number part: %c\n", (char)v);
425 *ok = 0;
426 return 0;
427 }
428 v &= 0xf;
429 v <<= shift;
430 n |= v;
431 shift += 4;
432 }
433 *off = newoff;
434 return n;
435 }
436
readFuncID(struct cli_bc * bc,unsigned char * p,unsigned * off,unsigned len,char * ok)437 static inline funcid_t readFuncID(struct cli_bc *bc, unsigned char *p,
438 unsigned *off, unsigned len, char *ok)
439 {
440 funcid_t id = readNumber(p, off, len, ok) - 1;
441 if (*ok && id >= bc->num_func) {
442 cli_errmsg("Called function out of range: %u >= %u\n", id, bc->num_func);
443 *ok = 0;
444 return ~0;
445 }
446 return id;
447 }
448
readAPIFuncID(struct cli_bc * bc,unsigned char * p,unsigned * off,unsigned len,char * ok)449 static inline funcid_t readAPIFuncID(struct cli_bc *bc, unsigned char *p,
450 unsigned *off, unsigned len, char *ok)
451 {
452 funcid_t id = readNumber(p, off, len, ok) - 1;
453 if (*ok && !cli_bitset_test(bc->uses_apis, id)) {
454 cli_errmsg("Called undeclared API function: %u\n", id);
455 *ok = 0;
456 return ~0;
457 }
458 return id;
459 }
460
readFixedNumber(const unsigned char * p,unsigned * off,unsigned len,char * ok,unsigned width)461 static inline unsigned readFixedNumber(const unsigned char *p, unsigned *off,
462 unsigned len, char *ok, unsigned width)
463 {
464 unsigned i, n = 0, shift = 0;
465 unsigned newoff = *off + width;
466 if (newoff > len) {
467 cli_errmsg("Newline encountered while reading number\n");
468 *ok = 0;
469 return 0;
470 }
471 for (i = *off; i < newoff; i++) {
472 unsigned v = p[i];
473 if (UNLIKELY((v & 0xf0) != 0x60)) {
474 cli_errmsg("Invalid number part: %c\n", v);
475 *ok = 0;
476 return 0;
477 }
478 v &= 0xf;
479 v <<= shift;
480 n |= v;
481 shift += 4;
482 }
483 *off = newoff;
484 return n;
485 }
486
readOperand(struct cli_bc_func * func,unsigned char * p,unsigned * off,unsigned len,char * ok)487 static inline operand_t readOperand(struct cli_bc_func *func, unsigned char *p,
488 unsigned *off, unsigned len, char *ok)
489 {
490 uint64_t v;
491 if ((p[*off] & 0xf0) == 0x40 || p[*off] == 0x50) {
492 uint64_t *dest;
493 uint16_t ty;
494 p[*off] |= 0x20;
495 /* TODO: unique constants */
496 func->constants = cli_realloc2(func->constants, (func->numConstants + 1) * sizeof(*func->constants));
497 if (!func->constants) {
498 *ok = 0;
499 return MAX_OP;
500 }
501 v = readNumber(p, off, len, ok);
502 dest = &func->constants[func->numConstants];
503 /* Write the constant to the correct place according to its type.
504 * This is needed on big-endian machines, because constants are always
505 * read as u64, but accessed as one of these types: u8, u16, u32, u64 */
506 *dest = 0;
507 ty = 8 * readFixedNumber(p, off, len, ok, 1);
508 if (!ty) {
509 /* This is a global variable */
510 return 0x80000000 | v;
511 }
512 if (ty <= 8)
513 *(uint8_t *)dest = v;
514 else if (ty <= 16)
515 *(uint16_t *)dest = v;
516 else if (ty <= 32)
517 *(uint32_t *)dest = v;
518 else
519 *dest = v;
520 return func->numValues + func->numConstants++;
521 }
522 v = readNumber(p, off, len, ok);
523 if (!*ok)
524 return MAX_OP;
525 if (v >= func->numValues) {
526 cli_errmsg("Operand index exceeds bounds: %u >= %u!\n", (unsigned)v, (unsigned)func->numValues);
527 *ok = 0;
528 return MAX_OP;
529 }
530 return v;
531 }
532
readData(const unsigned char * p,unsigned * off,unsigned len,char * ok,unsigned * datalen)533 static inline char *readData(const unsigned char *p, unsigned *off, unsigned len, char *ok, unsigned *datalen)
534 {
535 unsigned char *dat, *q;
536 unsigned l, newoff, i;
537 if (p[*off] != '|') {
538 cli_errmsg("Data start marker missing: %c\n", p[*off]);
539 *ok = 0;
540 return NULL;
541 }
542 (*off)++;
543 l = readNumber(p, off, len, ok);
544 if (!l || !ok) {
545 *datalen = l;
546 return NULL;
547 }
548 newoff = *off + 2 * l;
549 if (newoff > len) {
550 cli_errmsg("Line ended while reading data\n");
551 *ok = 0;
552 return 0;
553 }
554 dat = cli_malloc(l);
555 if (!dat) {
556 cli_errmsg("Cannot allocate memory for data\n");
557 *ok = 0;
558 return NULL;
559 }
560 q = dat;
561 for (i = *off; i < newoff; i += 2) {
562 const unsigned char v0 = p[i];
563 const unsigned char v1 = p[i + 1];
564 if (UNLIKELY((v0 & 0xf0) != 0x60 || (v1 & 0xf0) != 0x60)) {
565 cli_errmsg("Invalid data part: %c%c\n", v0, v1);
566 *ok = 0;
567 free(dat);
568 return 0;
569 }
570 *q++ = (v0 & 0xf) | ((v1 & 0xf) << 4);
571 }
572 *off = newoff;
573 *datalen = l;
574 return (char *)dat;
575 }
576
readString(const unsigned char * p,unsigned * off,unsigned len,char * ok)577 static inline char *readString(const unsigned char *p, unsigned *off, unsigned len, char *ok)
578 {
579 unsigned stringlen = 0;
580 char *str = readData(p, off, len, ok, &stringlen);
581 if (*ok && stringlen && str[stringlen - 1] != '\0') {
582 str[stringlen - 1] = '\0';
583 cli_errmsg("bytecode: string missing \\0 terminator: %s\n", str);
584 free(str);
585 *ok = 0;
586 return NULL;
587 }
588 return str;
589 }
590
parseHeader(struct cli_bc * bc,unsigned char * buffer,unsigned * linelength)591 static int parseHeader(struct cli_bc *bc, unsigned char *buffer, unsigned *linelength)
592 {
593 uint64_t magic1;
594 unsigned magic2;
595 char ok = 1;
596 unsigned offset, len, flevel;
597 char *pos;
598
599 if (strncmp((const char *)buffer, BC_HEADER, sizeof(BC_HEADER) - 1)) {
600 cli_errmsg("Missing file magic in bytecode");
601 return CL_EMALFDB;
602 }
603 offset = sizeof(BC_HEADER) - 1;
604 len = strlen((const char *)buffer);
605 bc->metadata.formatlevel = readNumber(buffer, &offset, len, &ok);
606 if (!ok) {
607 cli_errmsg("Unable to parse (format) functionality level in bytecode header\n");
608 return CL_EMALFDB;
609 }
610 /* we support 2 bytecode formats */
611 if (bc->metadata.formatlevel != BC_FORMAT_096 &&
612 bc->metadata.formatlevel != BC_FORMAT_LEVEL) {
613 cli_dbgmsg("Skipping bytecode with (format) functionality level: %u (current %u)\n",
614 bc->metadata.formatlevel, BC_FORMAT_LEVEL);
615 return CL_BREAK;
616 }
617 /* Optimistic parsing, check for error only at the end.*/
618 bc->metadata.timestamp = readNumber(buffer, &offset, len, &ok);
619 bc->metadata.sigmaker = readString(buffer, &offset, len, &ok);
620 bc->metadata.targetExclude = readNumber(buffer, &offset, len, &ok);
621 bc->kind = readNumber(buffer, &offset, len, &ok);
622 bc->metadata.minfunc = readNumber(buffer, &offset, len, &ok);
623 bc->metadata.maxfunc = readNumber(buffer, &offset, len, &ok);
624 flevel = cl_retflevel();
625 /* in 0.96 these 2 fields are unused / zero, in post 0.96 these mean
626 * min/max flevel.
627 * So 0 for min/max means no min/max
628 * Note that post 0.96 bytecode/bytecode lsig needs format 7, because
629 * 0.96 doesn't check lsig functionality level.
630 */
631 if ((bc->metadata.minfunc && bc->metadata.minfunc > flevel) ||
632 (bc->metadata.maxfunc && bc->metadata.maxfunc < flevel)) {
633 cli_dbgmsg("Skipping bytecode with (engine) functionality level %u-%u (current %u)\n",
634 bc->metadata.minfunc, bc->metadata.maxfunc, flevel);
635 return CL_BREAK;
636 }
637 bc->metadata.maxresource = readNumber(buffer, &offset, len, &ok);
638 bc->metadata.compiler = readString(buffer, &offset, len, &ok);
639 bc->num_types = readNumber(buffer, &offset, len, &ok);
640 bc->num_func = readNumber(buffer, &offset, len, &ok);
641 bc->state = bc_loaded;
642 bc->uses_apis = NULL;
643 bc->dbgnodes = NULL;
644 bc->dbgnode_cnt = 0;
645 if (!ok) {
646 cli_errmsg("Invalid bytecode header at %u\n", offset);
647 return CL_EMALFDB;
648 }
649 magic1 = readNumber(buffer, &offset, len, &ok);
650 magic2 = readFixedNumber(buffer, &offset, len, &ok, 2);
651 if (!ok || magic1 != 0x53e5493e9f3d1c30ull || magic2 != 42) {
652 unsigned long m0 = magic1 >> 32;
653 unsigned long m1 = magic1;
654 cli_errmsg("Magic numbers don't match: %lx%lx, %u\n", m0, m1, magic2);
655 return CL_EMALFDB;
656 }
657 if (buffer[offset] != ':') {
658 cli_errmsg("Expected : but found: %c\n", buffer[offset]);
659 return CL_EMALFDB;
660 }
661 offset++;
662 *linelength = strtol((const char *)buffer + offset, &pos, 10);
663 if (*pos != '\0') {
664 cli_errmsg("Invalid number: %s\n", buffer + offset);
665 return CL_EMALFDB;
666 }
667
668 bc->funcs = cli_calloc(bc->num_func, sizeof(*bc->funcs));
669 if (!bc->funcs) {
670 cli_errmsg("Out of memory allocating %u functions\n", bc->num_func);
671 return CL_EMEM;
672 }
673 bc->types = cli_calloc(bc->num_types, sizeof(*bc->types));
674 if (!bc->types) {
675 cli_errmsg("Out of memory allocating %u types\n", bc->num_types);
676 return CL_EMEM;
677 }
678 return CL_SUCCESS;
679 }
680
parseLSig(struct cli_bc * bc,char * buffer)681 static int parseLSig(struct cli_bc *bc, char *buffer)
682 {
683 const char *prefix;
684 char *vnames, *vend = strchr(buffer, ';');
685 if (vend) {
686 bc->lsig = cli_strdup(buffer);
687 *vend++ = '\0';
688 prefix = buffer;
689 vnames = strchr(vend, '{');
690 } else {
691 /* Not a logical signature, but we still have a virusname */
692 bc->hook_name = cli_strdup(buffer);
693 bc->lsig = NULL;
694 }
695
696 return CL_SUCCESS;
697 }
698
readTypeID(struct cli_bc * bc,unsigned char * buffer,unsigned * offset,unsigned len,char * ok)699 static uint16_t readTypeID(struct cli_bc *bc, unsigned char *buffer,
700 unsigned *offset, unsigned len, char *ok)
701 {
702 uint64_t t = readNumber(buffer, offset, len, ok);
703 if (!ok)
704 return ~0;
705 if (t >= bc->num_types + bc->start_tid) {
706 cli_errmsg("Invalid type id: %llu\n", (unsigned long long)t);
707 *ok = 0;
708 return ~0;
709 }
710 return t;
711 }
712
parseType(struct cli_bc * bc,struct cli_bc_type * ty,unsigned char * buffer,unsigned * off,unsigned len,char * ok)713 static void parseType(struct cli_bc *bc, struct cli_bc_type *ty,
714 unsigned char *buffer, unsigned *off, unsigned len,
715 char *ok)
716 {
717 unsigned j;
718
719 ty->numElements = readNumber(buffer, off, len, ok);
720 if (!*ok) {
721 cli_errmsg("Error parsing type\n");
722 *ok = 0;
723 return;
724 }
725 ty->containedTypes = cli_malloc(sizeof(*ty->containedTypes) * ty->numElements);
726 if (!ty->containedTypes) {
727 cli_errmsg("Out of memory allocating %u types\n", ty->numElements);
728 *ok = 0;
729 return;
730 }
731 for (j = 0; j < ty->numElements; j++) {
732 ty->containedTypes[j] = readTypeID(bc, buffer, off, len, ok);
733 }
734 }
735
736 static uint16_t containedTy[] = {8, 16, 32, 64};
737
738 #define NUM_STATIC_TYPES 4
add_static_types(struct cli_bc * bc)739 static void add_static_types(struct cli_bc *bc)
740 {
741 unsigned i;
742 for (i = 0; i < NUM_STATIC_TYPES; i++) {
743 bc->types[i].kind = DPointerType;
744 bc->types[i].numElements = 1;
745 bc->types[i].containedTypes = &containedTy[i];
746 bc->types[i].size = bc->types[i].align = 8;
747 }
748 }
749
parseTypes(struct cli_bc * bc,unsigned char * buffer)750 static int parseTypes(struct cli_bc *bc, unsigned char *buffer)
751 {
752 unsigned i, offset = 1, len = strlen((const char *)buffer);
753 char ok = 1;
754
755 if (buffer[0] != 'T') {
756 cli_errmsg("Invalid function types header: %c\n", buffer[0]);
757 return CL_EMALFDB;
758 }
759 bc->start_tid = readFixedNumber(buffer, &offset, len, &ok, 2);
760 if (bc->start_tid != BC_START_TID) {
761 cli_warnmsg("Type start id mismatch: %u != %u\n", bc->start_tid,
762 BC_START_TID);
763 return CL_BREAK;
764 }
765 add_static_types(bc);
766 for (i = (BC_START_TID - 65); i < bc->num_types - 1; i++) {
767 struct cli_bc_type *ty = &bc->types[i];
768 uint8_t t = readFixedNumber(buffer, &offset, len, &ok, 1);
769 if (!ok) {
770 cli_errmsg("Error reading type kind\n");
771 return CL_EMALFDB;
772 }
773 switch (t) {
774 case 1:
775 ty->kind = DFunctionType;
776 ty->size = ty->align = sizeof(void *);
777 parseType(bc, ty, buffer, &offset, len, &ok);
778 if (!ok) {
779 cli_errmsg("Error parsing type %u\n", i);
780 return CL_EMALFDB;
781 }
782 if (!ty->numElements) {
783 cli_errmsg("Function with no return type? %u\n", i);
784 return CL_EMALFDB;
785 }
786 break;
787 case 2:
788 case 3:
789 ty->kind = (t == 2) ? DPackedStructType : DStructType;
790 ty->size = ty->align = 0; /* TODO:calculate size/align of structs */
791 ty->align = 8;
792 parseType(bc, ty, buffer, &offset, len, &ok);
793 if (!ok) {
794 cli_errmsg("Error parsing type %u\n", i);
795 return CL_EMALFDB;
796 }
797 break;
798 case 4:
799 ty->kind = DArrayType;
800 /* number of elements of array, not subtypes! */
801 ty->numElements = readNumber(buffer, &offset, len, &ok);
802 if (!ok) {
803 cli_errmsg("Error parsing type %u\n", i);
804 return CL_EMALFDB;
805 }
806 /* fall-through */
807 case 5:
808 if (t == 5) {
809 ty->kind = DPointerType;
810 ty->numElements = 1;
811 }
812 ty->containedTypes = cli_malloc(sizeof(*ty->containedTypes));
813 if (!ty->containedTypes) {
814 cli_errmsg("Out of memory allocating containedType\n");
815 return CL_EMALFDB;
816 }
817 ty->containedTypes[0] = readTypeID(bc, buffer, &offset, len, &ok);
818 if (!ok) {
819 cli_errmsg("Error parsing type %u\n", i);
820 return CL_EMALFDB;
821 }
822 if (t == 5) {
823 /* for interpreter, pointers 64-bit there */
824 ty->size = ty->align = 8;
825 } else {
826 ty->size = ty->numElements * typesize(bc, ty->containedTypes[0]);
827 ty->align = typealign(bc, ty->containedTypes[0]);
828 }
829 break;
830 default:
831 cli_errmsg("Invalid type kind: %u\n", t);
832 return CL_EMALFDB;
833 }
834 }
835 for (i = (BC_START_TID - 65); i < bc->num_types - 1; i++) {
836 struct cli_bc_type *ty = &bc->types[i];
837 if (ty->kind == DArrayType) {
838 ty->size = ty->numElements * typesize(bc, ty->containedTypes[0]);
839 ty->align = typealign(bc, ty->containedTypes[0]);
840 }
841 }
842 return CL_SUCCESS;
843 }
844
845 /* checks whether the type described by tid is the same as the one described by
846 * apitid. */
types_equal(const struct cli_bc * bc,uint16_t * apity2ty,uint16_t tid,uint16_t apitid)847 static int types_equal(const struct cli_bc *bc, uint16_t *apity2ty, uint16_t tid, uint16_t apitid)
848 {
849 unsigned i;
850 const struct cli_bc_type *ty = &bc->types[tid - 65];
851 const struct cli_bc_type *apity = &cli_apicall_types[apitid];
852 /* If we've already verified type equality, return.
853 * Since we need to check equality of recursive types, we assume types are
854 * equal while checking equality of contained types, unless proven
855 * otherwise. */
856 if (apity2ty[apitid] == tid + 1)
857 return 1;
858 apity2ty[apitid] = tid + 1;
859
860 if (ty->kind != apity->kind) {
861 cli_dbgmsg("bytecode: type kind mismatch: %u != %u\n", ty->kind, apity->kind);
862 return 0;
863 }
864 if (ty->numElements != apity->numElements) {
865 cli_dbgmsg("bytecode: type numElements mismatch: %u != %u\n", ty->numElements, apity->numElements);
866 return 0;
867 }
868 for (i = 0; i < ty->numElements; i++) {
869 if (apity->containedTypes[i] < BC_START_TID) {
870 if (ty->containedTypes[i] != apity->containedTypes[i]) {
871 cli_dbgmsg("bytecode: contained type mismatch: %u != %u\n",
872 ty->containedTypes[i], apity->containedTypes[i]);
873 return 0;
874 }
875 } else if (!types_equal(bc, apity2ty, ty->containedTypes[i], apity->containedTypes[i] - BC_START_TID))
876 return 0;
877 if (ty->kind == DArrayType)
878 break; /* validated the contained type already */
879 }
880 return 1;
881 }
882
parseApis(struct cli_bc * bc,unsigned char * buffer)883 static int parseApis(struct cli_bc *bc, unsigned char *buffer)
884 {
885 unsigned i, offset = 1, len = strlen((const char *)buffer), maxapi, calls;
886 char ok = 1;
887 uint16_t *apity2ty; /*map of api type to current bytecode type ID */
888
889 if (buffer[0] != 'E') {
890 cli_errmsg("bytecode: Invalid api header: %c\n", buffer[0]);
891 return CL_EMALFDB;
892 }
893
894 maxapi = readNumber(buffer, &offset, len, &ok);
895 if (!ok)
896 return CL_EMALFDB;
897 if (maxapi > cli_apicall_maxapi) {
898 cli_dbgmsg("bytecode using API %u, but highest API known to libclamav is %u, skipping\n", maxapi, cli_apicall_maxapi);
899 return CL_BREAK;
900 }
901 calls = readNumber(buffer, &offset, len, &ok);
902 if (!ok)
903 return CL_EMALFDB;
904 if (calls > maxapi) {
905 cli_errmsg("bytecode: attempting to describe more APIs than max: %u > %u\n", calls, maxapi);
906 return CL_EMALFDB;
907 }
908 bc->uses_apis = cli_bitset_init();
909 if (!bc->uses_apis) {
910 cli_errmsg("Out of memory allocating apis bitset\n");
911 return CL_EMEM;
912 }
913 apity2ty = cli_calloc(cli_apicall_maxtypes, sizeof(*cli_apicall_types));
914 if (!apity2ty) {
915 cli_errmsg("Out of memory allocating apity2ty\n");
916 return CL_EMEM;
917 }
918 for (i = 0; i < calls; i++) {
919 unsigned id = readNumber(buffer, &offset, len, &ok);
920 uint16_t tid = readTypeID(bc, buffer, &offset, len, &ok);
921 char *name = readString(buffer, &offset, len, &ok);
922
923 /* validate APIcall prototype */
924 if (id > maxapi) {
925 cli_errmsg("bytecode: API id %u out of range, max %u\n", id, maxapi);
926 ok = 0;
927 }
928 /* API ids start from 1 */
929 id--;
930 if (ok && name && strcmp(cli_apicalls[id].name, name)) {
931 cli_errmsg("bytecode: API %u name mismatch: %s expected %s\n", id, name, cli_apicalls[id].name);
932 ok = 0;
933 }
934 if (ok && !types_equal(bc, apity2ty, tid, cli_apicalls[id].type)) {
935 cli_errmsg("bytecode: API %u prototype doesn't match\n", id);
936 ok = 0;
937 }
938 /* don't need the name anymore */
939 free(name);
940 if (!ok) {
941 free(apity2ty); /* free temporary map */
942 return CL_EMALFDB;
943 }
944
945 /* APIcall is valid */
946 cli_bitset_set(bc->uses_apis, id);
947 }
948 free(apity2ty); /* free temporary map */
949 cli_dbgmsg("bytecode: Parsed %u APIcalls, maxapi %u\n", calls, maxapi);
950 return CL_SUCCESS;
951 }
952
type_components(struct cli_bc * bc,uint16_t id,char * ok)953 static uint16_t type_components(struct cli_bc *bc, uint16_t id, char *ok)
954 {
955 unsigned i, sum = 0;
956 const struct cli_bc_type *ty;
957 if (id <= 64)
958 return 1;
959 ty = &bc->types[id - 65];
960 /* TODO: protect against recursive types */
961 switch (ty->kind) {
962 case DFunctionType:
963 cli_errmsg("bytecode: function type not accepted for constant: %u\n", id);
964 /* don't accept functions as constant initializers */
965 *ok = 0;
966 return 0;
967 case DPointerType:
968 return 2;
969 case DStructType:
970 case DPackedStructType:
971 for (i = 0; i < ty->numElements; i++) {
972 sum += type_components(bc, ty->containedTypes[i], ok);
973 }
974 return sum;
975 case DArrayType:
976 return type_components(bc, ty->containedTypes[0], ok) * ty->numElements;
977 default:
978 *ok = 0;
979 return 0;
980 }
981 }
982
readConstant(struct cli_bc * bc,unsigned i,unsigned comp,unsigned char * buffer,unsigned * offset,unsigned len,char * ok)983 static void readConstant(struct cli_bc *bc, unsigned i, unsigned comp,
984 unsigned char *buffer, unsigned *offset,
985 unsigned len, char *ok)
986 {
987 unsigned j = 0;
988 if (*ok && buffer[*offset] == 0x40 &&
989 buffer[*offset + 1] == 0x60) {
990 /* zero initializer */
991 memset(bc->globals[i], 0, sizeof(*bc->globals[0]) * comp);
992 (*offset) += 2;
993 return;
994 }
995 while (*ok && buffer[*offset] != 0x60) {
996 if (j >= comp) {
997 cli_errmsg("bytecode: constant has too many subcomponents, expected %u\n", comp);
998 *ok = 0;
999 return;
1000 }
1001 buffer[*offset] |= 0x20;
1002 bc->globals[i][j++] = readNumber(buffer, offset, len, ok);
1003 }
1004 if (*ok && j != comp) {
1005 cli_errmsg("bytecode: constant has too few subcomponents: %u < %u\n", j, comp);
1006 *ok = 0;
1007 }
1008 (*offset)++;
1009 }
1010
1011 /* parse constant globals with constant initializers */
parseGlobals(struct cli_bc * bc,unsigned char * buffer)1012 static int parseGlobals(struct cli_bc *bc, unsigned char *buffer)
1013 {
1014 unsigned i, offset = 1, len = strlen((const char *)buffer), numglobals;
1015 unsigned maxglobal;
1016 char ok = 1;
1017
1018 if (buffer[0] != 'G') {
1019 cli_errmsg("bytecode: Invalid globals header: %c\n", buffer[0]);
1020 return CL_EMALFDB;
1021 }
1022 maxglobal = readNumber(buffer, &offset, len, &ok);
1023 if (maxglobal > cli_apicall_maxglobal) {
1024 cli_dbgmsg("bytecode using global %u, but highest global known to libclamav is %u, skipping\n", maxglobal, cli_apicall_maxglobal);
1025 return CL_BREAK;
1026 }
1027 numglobals = readNumber(buffer, &offset, len, &ok);
1028 bc->globals = cli_calloc(numglobals, sizeof(*bc->globals));
1029 if (!bc->globals) {
1030 cli_errmsg("bytecode: OOM allocating memory for %u globals\n", numglobals);
1031 return CL_EMEM;
1032 }
1033 bc->globaltys = cli_calloc(numglobals, sizeof(*bc->globaltys));
1034 if (!bc->globaltys) {
1035 cli_errmsg("bytecode: OOM allocating memory for %u global types\n", numglobals);
1036 return CL_EMEM;
1037 }
1038 bc->num_globals = numglobals;
1039 if (!ok)
1040 return CL_EMALFDB;
1041 for (i = 0; i < numglobals; i++) {
1042 unsigned comp;
1043 bc->globaltys[i] = readTypeID(bc, buffer, &offset, len, &ok);
1044 comp = type_components(bc, bc->globaltys[i], &ok);
1045 if (!ok)
1046 return CL_EMALFDB;
1047 bc->globals[i] = cli_malloc(sizeof(*bc->globals[0]) * comp);
1048 if (!bc->globals[i])
1049 return CL_EMEM;
1050 readConstant(bc, i, comp, buffer, &offset, len, &ok);
1051 }
1052 if (!ok)
1053 return CL_EMALFDB;
1054 if (offset != len) {
1055 cli_errmsg("Trailing garbage in globals: %d extra bytes\n",
1056 len - offset);
1057 return CL_EMALFDB;
1058 }
1059 return CL_SUCCESS;
1060 }
1061
parseMD(struct cli_bc * bc,unsigned char * buffer)1062 static int parseMD(struct cli_bc *bc, unsigned char *buffer)
1063 {
1064 unsigned offset = 1, len = strlen((const char *)buffer);
1065 unsigned numMD, i, b;
1066 char ok = 1;
1067 if (buffer[0] != 'D')
1068 return CL_EMALFDB;
1069 numMD = readNumber(buffer, &offset, len, &ok);
1070 if (!ok) {
1071 cli_errmsg("Unable to parse number of MD nodes\n");
1072 return CL_EMALFDB;
1073 }
1074 b = bc->dbgnode_cnt;
1075 bc->dbgnode_cnt += numMD;
1076 bc->dbgnodes = cli_realloc(bc->dbgnodes, bc->dbgnode_cnt * sizeof(*bc->dbgnodes));
1077 if (!bc->dbgnodes)
1078 return CL_EMEM;
1079 for (i = 0; i < numMD; i++) {
1080 unsigned j;
1081 struct cli_bc_dbgnode_element *elts;
1082 unsigned el = readNumber(buffer, &offset, len, &ok);
1083 if (!ok) {
1084 cli_errmsg("Unable to parse number of elements\n");
1085 return CL_EMALFDB;
1086 }
1087 bc->dbgnodes[b + i].numelements = el;
1088 bc->dbgnodes[b + i].elements = elts = cli_calloc(el, sizeof(*elts));
1089 if (!elts)
1090 return CL_EMEM;
1091 for (j = 0; j < el; j++) {
1092 if (buffer[offset] == '|') {
1093 elts[j].string = readData(buffer, &offset, len, &ok, &elts[j].len);
1094 if (!ok)
1095 return CL_EMALFDB;
1096 } else {
1097 elts[j].len = readNumber(buffer, &offset, len, &ok);
1098 if (!ok)
1099 return CL_EMALFDB;
1100 if (elts[j].len) {
1101 elts[j].constant = readNumber(buffer, &offset, len, &ok);
1102 } else
1103 elts[j].nodeid = readNumber(buffer, &offset, len, &ok);
1104 if (!ok)
1105 return CL_EMALFDB;
1106 }
1107 }
1108 }
1109 cli_dbgmsg("bytecode: Parsed %u nodes total\n", bc->dbgnode_cnt);
1110 return CL_SUCCESS;
1111 }
1112
parseFunctionHeader(struct cli_bc * bc,unsigned fn,unsigned char * buffer)1113 static int parseFunctionHeader(struct cli_bc *bc, unsigned fn, unsigned char *buffer)
1114 {
1115 char ok = 1;
1116 unsigned offset, len, all_locals = 0, i;
1117 struct cli_bc_func *func;
1118
1119 if (fn >= bc->num_func) {
1120 cli_errmsg("Found more functions than declared: %u >= %u\n", fn,
1121 bc->num_func);
1122 return CL_EMALFDB;
1123 }
1124 func = &bc->funcs[fn];
1125 len = strlen((const char *)buffer);
1126
1127 if (buffer[0] != 'A') {
1128 cli_errmsg("Invalid function arguments header: %c\n", buffer[0]);
1129 return CL_EMALFDB;
1130 }
1131 offset = 1;
1132 func->numArgs = readFixedNumber(buffer, &offset, len, &ok, 1);
1133 func->returnType = readTypeID(bc, buffer, &offset, len, &ok);
1134 if (buffer[offset] != 'L') {
1135 cli_errmsg("Invalid function locals header: %c\n", buffer[offset]);
1136 return CL_EMALFDB;
1137 }
1138 offset++;
1139 func->numLocals = readNumber(buffer, &offset, len, &ok);
1140 if (!ok) {
1141 cli_errmsg("Invalid number of arguments/locals\n");
1142 return CL_EMALFDB;
1143 }
1144 all_locals = func->numArgs + func->numLocals;
1145 if (!all_locals) {
1146 func->types = NULL;
1147 } else {
1148 func->types = cli_calloc(all_locals, sizeof(*func->types));
1149 if (!func->types) {
1150 cli_errmsg("Out of memory allocating function arguments\n");
1151 return CL_EMEM;
1152 }
1153 }
1154 for (i = 0; i < all_locals; i++) {
1155 func->types[i] = readNumber(buffer, &offset, len, &ok);
1156 if (readFixedNumber(buffer, &offset, len, &ok, 1))
1157 func->types[i] |= 0x8000;
1158 }
1159 if (!ok) {
1160 cli_errmsg("Invalid local types\n");
1161 return CL_EMALFDB;
1162 }
1163 if (buffer[offset] != 'F') {
1164 cli_errmsg("Invalid function body header: %c\n", buffer[offset]);
1165 return CL_EMALFDB;
1166 }
1167 offset++;
1168 func->numInsts = readNumber(buffer, &offset, len, &ok);
1169 if (!ok) {
1170 cli_errmsg("Invalid instructions count\n");
1171 return CL_EMALFDB;
1172 }
1173 func->numValues = func->numArgs + func->numLocals;
1174 func->insn_idx = 0;
1175 func->numConstants = 0;
1176 func->allinsts = cli_calloc(func->numInsts, sizeof(*func->allinsts));
1177 if (!func->allinsts) {
1178 cli_errmsg("Out of memory allocating instructions\n");
1179 return CL_EMEM;
1180 }
1181 func->numBB = readNumber(buffer, &offset, len, &ok);
1182 if (!ok) {
1183 cli_errmsg("Invalid basic block count\n");
1184 return CL_EMALFDB;
1185 }
1186 func->BB = cli_calloc(func->numBB, sizeof(*func->BB));
1187 if (!func->BB) {
1188 cli_errmsg("Out of memory allocating basic blocks\n");
1189 return CL_EMEM;
1190 }
1191 return CL_SUCCESS;
1192 }
1193
readBBID(struct cli_bc_func * func,const unsigned char * buffer,unsigned * off,unsigned len,char * ok)1194 static bbid_t readBBID(struct cli_bc_func *func, const unsigned char *buffer, unsigned *off, unsigned len, char *ok)
1195 {
1196 unsigned id = readNumber(buffer, off, len, ok);
1197 if (!id || id >= func->numBB) {
1198 cli_errmsg("Basic block ID out of range: %u\n", id);
1199 *ok = 0;
1200 }
1201 if (!*ok)
1202 return ~0;
1203 return id;
1204 }
1205
1206 /*
1207 static uint16_t get_type(struct cli_bc_func *func, operand_t op)
1208 {
1209 if (op >= func->numValues)
1210 return 64;
1211 return func->types[op];
1212 }*/
get_optype(const struct cli_bc_func * bcfunc,operand_t op)1213 static int16_t get_optype(const struct cli_bc_func *bcfunc, operand_t op)
1214 {
1215 if (op >= bcfunc->numArgs + bcfunc->numLocals)
1216 return 0;
1217 return bcfunc->types[op] & 0x7fff;
1218 }
1219
parseBB(struct cli_bc * bc,unsigned func,unsigned bb,unsigned char * buffer)1220 static int parseBB(struct cli_bc *bc, unsigned func, unsigned bb, unsigned char *buffer)
1221 {
1222 char ok = 1;
1223 unsigned offset, len, i, last = 0;
1224 struct cli_bc_bb *BB;
1225 struct cli_bc_func *bcfunc = &bc->funcs[func];
1226 struct cli_bc_inst inst;
1227
1228 if (bb >= bcfunc->numBB) {
1229 cli_errmsg("Found too many basic blocks\n");
1230 return CL_EMALFDB;
1231 }
1232
1233 BB = &bcfunc->BB[bb];
1234 len = strlen((const char *)buffer);
1235 if (buffer[0] != 'B') {
1236 cli_errmsg("Invalid basic block header: %c\n", buffer[0]);
1237 return CL_EMALFDB;
1238 }
1239 offset = 1;
1240 BB->numInsts = 0;
1241 BB->insts = &bcfunc->allinsts[bcfunc->insn_idx];
1242 while (!last) {
1243 unsigned numOp;
1244 if (buffer[offset] == 'T') {
1245 last = 1;
1246 offset++;
1247 /* terminators are void */
1248 inst.type = 0;
1249 inst.dest = 0;
1250 } else {
1251 inst.type = readNumber(buffer, &offset, len, &ok);
1252 inst.dest = readNumber(buffer, &offset, len, &ok);
1253 }
1254 inst.opcode = readFixedNumber(buffer, &offset, len, &ok, 2);
1255 if (!ok) {
1256 cli_errmsg("Invalid type or operand\n");
1257 return CL_EMALFDB;
1258 }
1259 if (inst.opcode >= OP_BC_INVALID) {
1260 cli_errmsg("Invalid opcode: %u\n", inst.opcode);
1261 return CL_EMALFDB;
1262 }
1263
1264 switch (inst.opcode) {
1265 case OP_BC_JMP:
1266 inst.u.jump = readBBID(bcfunc, buffer, &offset, len, &ok);
1267 break;
1268 case OP_BC_RET:
1269 inst.type = readNumber(buffer, &offset, len, &ok);
1270 inst.u.unaryop = readOperand(bcfunc, buffer, &offset, len, &ok);
1271 break;
1272 case OP_BC_BRANCH:
1273 inst.u.branch.condition = readOperand(bcfunc, buffer, &offset, len, &ok);
1274 inst.u.branch.br_true = readBBID(bcfunc, buffer, &offset, len, &ok);
1275 inst.u.branch.br_false = readBBID(bcfunc, buffer, &offset, len, &ok);
1276 break;
1277 case OP_BC_CALL_API: /* fall-through */
1278 case OP_BC_CALL_DIRECT:
1279 numOp = readFixedNumber(buffer, &offset, len, &ok, 1);
1280 if (ok) {
1281 inst.u.ops.numOps = numOp;
1282 inst.u.ops.opsizes = NULL;
1283 if (!numOp) {
1284 inst.u.ops.ops = NULL;
1285 } else {
1286 inst.u.ops.ops = cli_calloc(numOp, sizeof(*inst.u.ops.ops));
1287 if (!inst.u.ops.ops) {
1288 cli_errmsg("Out of memory allocating operands\n");
1289 return CL_EMEM;
1290 }
1291 }
1292 if (inst.opcode == OP_BC_CALL_DIRECT)
1293 inst.u.ops.funcid = readFuncID(bc, buffer, &offset, len, &ok);
1294 else
1295 inst.u.ops.funcid = readAPIFuncID(bc, buffer, &offset, len, &ok);
1296 for (i = 0; i < numOp; i++) {
1297 inst.u.ops.ops[i] = readOperand(bcfunc, buffer, &offset, len, &ok);
1298 }
1299 }
1300 break;
1301 case OP_BC_ZEXT:
1302 case OP_BC_SEXT:
1303 case OP_BC_TRUNC:
1304 inst.u.cast.source = readOperand(bcfunc, buffer, &offset, len, &ok);
1305 inst.u.cast.mask = bcfunc->types[inst.u.cast.source];
1306 if (inst.u.cast.mask == 1)
1307 inst.u.cast.size = 0;
1308 else if (inst.u.cast.mask <= 8)
1309 inst.u.cast.size = 1;
1310 else if (inst.u.cast.mask <= 16)
1311 inst.u.cast.size = 2;
1312 else if (inst.u.cast.mask <= 32)
1313 inst.u.cast.size = 3;
1314 else if (inst.u.cast.mask <= 64)
1315 inst.u.cast.size = 4;
1316 /* calculate mask */
1317 if (inst.opcode != OP_BC_SEXT)
1318 inst.u.cast.mask = inst.u.cast.mask != 64 ? (1ull << inst.u.cast.mask) - 1 : ~0ull;
1319 break;
1320 case OP_BC_GEP1:
1321 case OP_BC_GEPZ:
1322 inst.u.three[0] = readNumber(buffer, &offset, len, &ok);
1323 inst.u.three[1] = readOperand(bcfunc, buffer, &offset, len, &ok);
1324 inst.u.three[2] = readOperand(bcfunc, buffer, &offset, len, &ok);
1325 break;
1326 case OP_BC_GEPN:
1327 numOp = readFixedNumber(buffer, &offset, len, &ok, 1);
1328 if (ok) {
1329 inst.u.ops.numOps = numOp + 2;
1330 inst.u.ops.opsizes = NULL;
1331 inst.u.ops.ops = cli_calloc(numOp + 2, sizeof(*inst.u.ops.ops));
1332 if (!inst.u.ops.ops) {
1333 cli_errmsg("Out of memory allocating operands\n");
1334 return CL_EMEM;
1335 }
1336 inst.u.ops.ops[0] = readNumber(buffer, &offset, len, &ok);
1337 for (i = 1; i < numOp + 2; i++)
1338 inst.u.ops.ops[i] = readOperand(bcfunc, buffer, &offset, len, &ok);
1339 }
1340 break;
1341 case OP_BC_ICMP_EQ:
1342 case OP_BC_ICMP_NE:
1343 case OP_BC_ICMP_UGT:
1344 case OP_BC_ICMP_UGE:
1345 case OP_BC_ICMP_ULT:
1346 case OP_BC_ICMP_ULE:
1347 case OP_BC_ICMP_SGT:
1348 case OP_BC_ICMP_SGE:
1349 case OP_BC_ICMP_SLE:
1350 case OP_BC_ICMP_SLT:
1351 /* instruction type must be correct before readOperand! */
1352 inst.type = readNumber(buffer, &offset, len, &ok);
1353 /* fall-through */
1354 default:
1355 numOp = operand_counts[inst.opcode];
1356 switch (numOp) {
1357 case 0:
1358 break;
1359 case 1:
1360 inst.u.unaryop = readOperand(bcfunc, buffer, &offset, len, &ok);
1361 break;
1362 case 2:
1363 inst.u.binop[0] = readOperand(bcfunc, buffer, &offset, len, &ok);
1364 inst.u.binop[1] = readOperand(bcfunc, buffer, &offset, len, &ok);
1365 break;
1366 case 3:
1367 inst.u.three[0] = readOperand(bcfunc, buffer, &offset, len, &ok);
1368 inst.u.three[1] = readOperand(bcfunc, buffer, &offset, len, &ok);
1369 inst.u.three[2] = readOperand(bcfunc, buffer, &offset, len, &ok);
1370 break;
1371 default:
1372 cli_errmsg("Opcode %u with too many operands: %u?\n", inst.opcode, numOp);
1373 ok = 0;
1374 break;
1375 }
1376 }
1377 if (inst.opcode == OP_BC_STORE) {
1378 int16_t t = get_optype(bcfunc, inst.u.binop[0]);
1379 if (t)
1380 inst.type = t;
1381 }
1382 if (inst.opcode == OP_BC_COPY)
1383 inst.type = get_optype(bcfunc, inst.u.binop[1]);
1384 if (!ok) {
1385 cli_errmsg("Invalid instructions or operands\n");
1386 return CL_EMALFDB;
1387 }
1388 if (bcfunc->insn_idx + BB->numInsts >= bcfunc->numInsts) {
1389 cli_errmsg("More instructions than declared in total: %u > %u!\n",
1390 bcfunc->insn_idx + BB->numInsts, bcfunc->numInsts);
1391 return CL_EMALFDB;
1392 }
1393 inst.interp_op = inst.opcode * 5;
1394 if (inst.type > 1) {
1395 if (inst.type <= 8)
1396 inst.interp_op += 1;
1397 else if (inst.type <= 16)
1398 inst.interp_op += 2;
1399 else if (inst.type <= 32)
1400 inst.interp_op += 3;
1401 else if (inst.type <= 65)
1402 inst.interp_op += 4;
1403 else {
1404 cli_dbgmsg("unknown inst type: %d\n", inst.type);
1405 }
1406 }
1407 BB->insts[BB->numInsts++] = inst;
1408 }
1409 if (bb + 1 == bc->funcs[func].numBB) {
1410 if (buffer[offset] != 'E') {
1411 cli_errmsg("Missing basicblock terminator, got: %c\n", buffer[offset]);
1412 return CL_EMALFDB;
1413 }
1414 offset++;
1415 }
1416 if (buffer[offset] == 'D') {
1417 uint32_t num;
1418 offset += 3;
1419 if (offset >= len)
1420 return CL_EMALFDB;
1421 num = (uint32_t)readNumber(buffer, &offset, len, &ok);
1422 if (!ok)
1423 return CL_EMALFDB;
1424 if (num != bcfunc->numInsts) {
1425 cli_errmsg("invalid number of dbg nodes, expected: %u, got: %u\n", bcfunc->numInsts, num);
1426 return CL_EMALFDB;
1427 }
1428 bcfunc->dbgnodes = cli_malloc(num * sizeof(*bcfunc->dbgnodes));
1429 if (!bcfunc->dbgnodes) {
1430 cli_errmsg("Unable to allocate memory for dbg nodes: %u\n", num * (uint32_t)sizeof(*bcfunc->dbgnodes));
1431 return CL_EMEM;
1432 }
1433 for (i = 0; (uint32_t)i < num; i++) {
1434 bcfunc->dbgnodes[i] = readNumber(buffer, &offset, len, &ok);
1435 if (!ok)
1436 return CL_EMALFDB;
1437 }
1438 }
1439 if (offset != len) {
1440 cli_errmsg("Trailing garbage in basicblock: %d extra bytes\n",
1441 len - offset);
1442 return CL_EMALFDB;
1443 }
1444 bcfunc->numBytes = 0;
1445 bcfunc->insn_idx += BB->numInsts;
1446 return CL_SUCCESS;
1447 }
1448
1449 enum parse_state {
1450 PARSE_BC_TYPES = 0,
1451 PARSE_BC_APIS,
1452 PARSE_BC_GLOBALS,
1453 PARSE_BC_LSIG,
1454 PARSE_MD_OPT_HEADER,
1455 PARSE_FUNC_HEADER,
1456 PARSE_BB,
1457 PARSE_SKIP
1458 };
1459
1460 struct sigperf_elem {
1461 const char *bc_name;
1462 uint64_t usecs;
1463 unsigned long run_count;
1464 unsigned long match_count;
1465 };
1466
sigelem_comp(const void * a,const void * b)1467 static int sigelem_comp(const void *a, const void *b)
1468 {
1469 const struct sigperf_elem *ela = a;
1470 const struct sigperf_elem *elb = b;
1471 return elb->usecs / elb->run_count - ela->usecs / ela->run_count;
1472 }
1473
cli_sigperf_print()1474 void cli_sigperf_print()
1475 {
1476 struct sigperf_elem stats[MAX_TRACKED_BC], *elem = stats;
1477 int i, elems = 0, max_name_len = 0, name_len;
1478
1479 if (!g_sigid || !g_sigevents) {
1480 cli_warnmsg("cli_sigperf_print: statistics requested but no bytecodes were loaded!\n");
1481 return;
1482 }
1483
1484 memset(stats, 0, sizeof(stats));
1485 for (i = 0; i < MAX_TRACKED_BC; i++) {
1486 union ev_val val;
1487 uint32_t count;
1488 const char *name = cli_event_get_name(g_sigevents, i * BC_EVENTS_PER_SIG);
1489 cli_event_get(g_sigevents, i * BC_EVENTS_PER_SIG, &val, &count);
1490 if (!count) {
1491 if (name)
1492 cli_dbgmsg("No event triggered for %s\n", name);
1493 continue;
1494 }
1495 if (name)
1496 name_len = (int)strlen(name);
1497 else
1498 name_len = 0;
1499 if (name_len > max_name_len)
1500 max_name_len = name_len;
1501 elem->bc_name = name ? name : "\"noname\"";
1502 elem->usecs = val.v_int;
1503 elem->run_count = count;
1504 cli_event_get(g_sigevents, i * BC_EVENTS_PER_SIG + 1, &val, &count);
1505 elem->match_count = count;
1506 elem++;
1507 elems++;
1508 }
1509 if (max_name_len < (int)strlen("Bytecode name"))
1510 max_name_len = (int)strlen("Bytecode name");
1511
1512 cli_qsort(stats, elems, sizeof(struct sigperf_elem), sigelem_comp);
1513
1514 elem = stats;
1515 /* name runs matches microsecs avg */
1516 cli_infomsg(NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "Bytecode name",
1517 8, "#runs", 8, "#matches", 12, "usecs total", 9, "usecs avg");
1518 cli_infomsg(NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "=============",
1519 8, "=====", 8, "========", 12, "===========", 9, "=========");
1520 while (elem->run_count) {
1521 cli_infomsg(NULL, "%-*s %*lu %*lu %*" PRIu64 " %*.2f\n", max_name_len, elem->bc_name,
1522 8, elem->run_count, 8, elem->match_count,
1523 12, elem->usecs, 9, (double)elem->usecs / elem->run_count);
1524 elem++;
1525 }
1526 }
1527
sigperf_events_init(struct cli_bc * bc)1528 static void sigperf_events_init(struct cli_bc *bc)
1529 {
1530 int ret;
1531 char *bc_name;
1532
1533 if (!g_sigevents)
1534 g_sigevents = cli_events_new(MAX_BC_SIGEVENT_ID);
1535
1536 if (!g_sigevents) {
1537 cli_errmsg("No memory for events table\n");
1538 return;
1539 }
1540
1541 if (g_sigid > MAX_BC_SIGEVENT_ID - BC_EVENTS_PER_SIG - 1) {
1542 cli_errmsg("sigperf_events_init: events table full. Increase MAX_TRACKED_BC\n");
1543 return;
1544 }
1545
1546 if (!(bc_name = bc->lsig)) {
1547 if (!(bc_name = bc->hook_name)) {
1548 cli_dbgmsg("cli_event_define error for time event id %d\n", bc->sigtime_id);
1549 return;
1550 }
1551 }
1552
1553 cli_dbgmsg("sigperf_events_init(): adding sig ids starting %u for %s\n", g_sigid, bc_name);
1554
1555 /* register time event */
1556 bc->sigtime_id = g_sigid;
1557 ret = cli_event_define(g_sigevents, g_sigid++, bc_name, ev_time, multiple_sum);
1558 if (ret) {
1559 cli_errmsg("sigperf_events_init: cli_event_define() error for time event id %d\n", bc->sigtime_id);
1560 bc->sigtime_id = MAX_BC_SIGEVENT_ID + 1;
1561 return;
1562 }
1563
1564 /* register match count */
1565 bc->sigmatch_id = g_sigid;
1566 ret = cli_event_define(g_sigevents, g_sigid++, bc_name, ev_int, multiple_sum);
1567 if (ret) {
1568 cli_errmsg("sigperf_events_init: cli_event_define() error for matches event id %d\n", bc->sigmatch_id);
1569 bc->sigmatch_id = MAX_BC_SIGEVENT_ID + 1;
1570 return;
1571 }
1572 }
1573
cli_sigperf_events_destroy()1574 void cli_sigperf_events_destroy()
1575 {
1576 cli_events_free(g_sigevents);
1577 }
1578
cli_bytecode_load(struct cli_bc * bc,FILE * f,struct cli_dbio * dbio,int trust,int sigperf)1579 int cli_bytecode_load(struct cli_bc *bc, FILE *f, struct cli_dbio *dbio, int trust, int sigperf)
1580 {
1581 unsigned row = 0, current_func = 0, bb = 0;
1582 char *buffer;
1583 unsigned linelength = 0;
1584 char firstbuf[FILEBUFF];
1585 enum parse_state state;
1586 int rc, end = 0;
1587
1588 memset(bc, 0, sizeof(*bc));
1589 cli_dbgmsg("Loading %s bytecode\n", trust ? "trusted" : "untrusted");
1590 bc->trusted = trust;
1591 if (!f && !dbio) {
1592 cli_errmsg("Unable to load bytecode (null file)\n");
1593 return CL_ENULLARG;
1594 }
1595 if (!cli_dbgets(firstbuf, FILEBUFF, f, dbio)) {
1596 cli_errmsg("Unable to load bytecode (empty file)\n");
1597 return CL_EMALFDB;
1598 }
1599 cli_chomp(firstbuf);
1600 rc = parseHeader(bc, (unsigned char *)firstbuf, &linelength);
1601 state = PARSE_BC_LSIG;
1602 if (rc == CL_BREAK) {
1603 const char *len = strchr(firstbuf, ':');
1604 bc->state = bc_skip;
1605 if (!linelength) {
1606 linelength = len ? atoi(len + 1) : 4096;
1607 }
1608 if (linelength < 4096)
1609 linelength = 4096;
1610 cli_dbgmsg("line: %d\n", linelength);
1611 state = PARSE_SKIP;
1612 rc = CL_SUCCESS;
1613 }
1614 if (rc != CL_SUCCESS) {
1615 cli_errmsg("Error at bytecode line %u\n", row);
1616 return rc;
1617 }
1618 buffer = cli_malloc(linelength);
1619 if (!buffer) {
1620 cli_errmsg("Out of memory allocating line of length %u\n", linelength);
1621 return CL_EMEM;
1622 }
1623 while (cli_dbgets(buffer, linelength, f, dbio) && !end) {
1624 cli_chomp(buffer);
1625 row++;
1626 switch (state) {
1627 case PARSE_BC_LSIG:
1628 rc = parseLSig(bc, buffer);
1629 #if 0
1630 DEAD CODE
1631 if (rc == CL_BREAK) /* skip */ { //FIXME: parseLSig always returns CL_SUCCESS
1632 bc->state = bc_skip;
1633 state = PARSE_SKIP;
1634 continue;
1635 }
1636 if (rc != CL_SUCCESS) { //FIXME: parseLSig always returns CL_SUCCESS
1637 cli_errmsg("Error at bytecode line %u\n", row);
1638 free(buffer);
1639 return rc;
1640 }
1641 #endif
1642 state = PARSE_BC_TYPES;
1643 break;
1644 case PARSE_BC_TYPES:
1645 rc = parseTypes(bc, (unsigned char *)buffer);
1646 if (rc != CL_SUCCESS) {
1647 cli_errmsg("Error at bytecode line %u\n", row);
1648 free(buffer);
1649 return rc;
1650 }
1651 state = PARSE_BC_APIS;
1652 break;
1653 case PARSE_BC_APIS:
1654 rc = parseApis(bc, (unsigned char *)buffer);
1655 if (rc == CL_BREAK) /* skip */ {
1656 bc->state = bc_skip;
1657 state = PARSE_SKIP;
1658 continue;
1659 }
1660 if (rc != CL_SUCCESS) {
1661 cli_errmsg("Error at bytecode line %u\n", row);
1662 free(buffer);
1663 return rc;
1664 }
1665 state = PARSE_BC_GLOBALS;
1666 break;
1667 case PARSE_BC_GLOBALS:
1668 rc = parseGlobals(bc, (unsigned char *)buffer);
1669 if (rc == CL_BREAK) /* skip */ {
1670 bc->state = bc_skip;
1671 state = PARSE_SKIP;
1672 continue;
1673 }
1674 if (rc != CL_SUCCESS) {
1675 cli_errmsg("Error at bytecode line %u\n", row);
1676 free(buffer);
1677 return rc;
1678 }
1679 state = PARSE_MD_OPT_HEADER;
1680 break;
1681 case PARSE_MD_OPT_HEADER:
1682 if (buffer[0] == 'D') {
1683 rc = parseMD(bc, (unsigned char *)buffer);
1684 if (rc != CL_SUCCESS) {
1685 cli_errmsg("Error at bytecode line %u\n", row);
1686 free(buffer);
1687 return rc;
1688 }
1689 break;
1690 }
1691 /* fall-through */
1692 case PARSE_FUNC_HEADER:
1693 if (*buffer == 'S') {
1694 end = 1;
1695 break;
1696 }
1697 rc = parseFunctionHeader(bc, current_func, (unsigned char *)buffer);
1698 if (rc != CL_SUCCESS) {
1699 cli_errmsg("Error at bytecode line %u\n", row);
1700 free(buffer);
1701 return rc;
1702 }
1703 bb = 0;
1704 state = PARSE_BB;
1705 break;
1706 case PARSE_BB:
1707 rc = parseBB(bc, current_func, bb++, (unsigned char *)buffer);
1708 if (rc != CL_SUCCESS) {
1709 cli_errmsg("Error at bytecode line %u\n", row);
1710 free(buffer);
1711 return rc;
1712 }
1713 if (bb >= bc->funcs[current_func].numBB) {
1714 if (bc->funcs[current_func].insn_idx != bc->funcs[current_func].numInsts) {
1715 cli_errmsg("Parsed different number of instructions than declared: %u != %u\n",
1716 bc->funcs[current_func].insn_idx, bc->funcs[current_func].numInsts);
1717 free(buffer);
1718 return CL_EMALFDB;
1719 }
1720 cli_dbgmsg("Parsed %u BBs, %u instructions\n",
1721 bb, bc->funcs[current_func].numInsts);
1722 state = PARSE_FUNC_HEADER;
1723 current_func++;
1724 }
1725 break;
1726 case PARSE_SKIP:
1727 /* stop at S (source code), readdb.c knows how to skip this one
1728 * */
1729 if (buffer[0] == 'S')
1730 end = 1;
1731 /* noop parse, but we need to use dbgets with dynamic buffer,
1732 * otherwise we get 'Line too long for provided buffer' */
1733 break;
1734 }
1735 }
1736 free(buffer);
1737 cli_dbgmsg("Parsed %d functions\n", current_func);
1738 if (sigperf)
1739 sigperf_events_init(bc);
1740 if (current_func != bc->num_func && bc->state != bc_skip) {
1741 cli_errmsg("Loaded less functions than declared: %u vs. %u\n",
1742 current_func, bc->num_func);
1743 return CL_EMALFDB;
1744 }
1745 return CL_SUCCESS;
1746 }
1747
1748 static struct {
1749 enum bc_events id;
1750 const char *name;
1751 enum ev_type type;
1752 enum multiple_handling multiple;
1753 } bc_events[] = {
1754 {BCEV_VIRUSNAME, "virusname", ev_string, multiple_last},
1755 {BCEV_EXEC_RETURNVALUE, "returnvalue", ev_int, multiple_last},
1756 {BCEV_WRITE, "bcapi_write", ev_data_fast, multiple_sum},
1757 {BCEV_OFFSET, "read offset", ev_int, multiple_sum},
1758 {BCEV_READ, "read data", ev_data_fast, multiple_sum},
1759 //{BCEV_READ, "read data", ev_data, multiple_concat},
1760 {BCEV_DBG_STR, "debug message", ev_data_fast, multiple_sum},
1761 {BCEV_DBG_INT, "debug int", ev_int, multiple_sum},
1762 {BCEV_MEM_1, "memmem 1", ev_data_fast, multiple_sum},
1763 {BCEV_MEM_2, "memmem 2", ev_data_fast, multiple_sum},
1764 {BCEV_FIND, "find", ev_data_fast, multiple_sum},
1765 {BCEV_EXTRACTED, "extracted files", ev_int, multiple_sum},
1766 {BCEV_READ_ERR, "read errors", ev_int, multiple_sum},
1767 {BCEV_DISASM_FAIL, "disasm fails", ev_int, multiple_sum},
1768 {BCEV_EXEC_TIME, "bytecode execute", ev_time, multiple_sum}};
1769
register_events(cli_events_t * ev)1770 static int register_events(cli_events_t *ev)
1771 {
1772 size_t i;
1773 for (i = 0; i < sizeof(bc_events) / sizeof(bc_events[0]); i++) {
1774 if (cli_event_define(ev, bc_events[i].id, bc_events[i].name, bc_events[i].type,
1775 bc_events[i].multiple) == -1)
1776 return -1;
1777 }
1778 return 0;
1779 }
1780
cli_bytecode_run(const struct cli_all_bc * bcs,const struct cli_bc * bc,struct cli_bc_ctx * ctx)1781 int cli_bytecode_run(const struct cli_all_bc *bcs, const struct cli_bc *bc, struct cli_bc_ctx *ctx)
1782 {
1783 int ret = CL_SUCCESS;
1784 struct cli_bc_inst inst;
1785 struct cli_bc_func func;
1786 cli_events_t *jit_ev = NULL, *interp_ev = NULL;
1787
1788 int test_mode = 0;
1789 cli_ctx *cctx = (cli_ctx *)ctx->ctx;
1790
1791 if (!ctx || !ctx->bc || !ctx->func)
1792 return CL_ENULLARG;
1793 if (ctx->numParams && (!ctx->values || !ctx->operands))
1794 return CL_ENULLARG;
1795
1796 if (cctx && cctx->engine->bytecode_mode == CL_BYTECODE_MODE_TEST)
1797 test_mode = 1;
1798
1799 if (bc->state == bc_loaded) {
1800 cli_errmsg("bytecode has to be prepared either for interpreter or JIT!\n");
1801 return CL_EARG;
1802 }
1803 if (bc->state == bc_disabled) {
1804 cli_dbgmsg("bytecode triggered but running bytecodes is disabled\n");
1805 return CL_SUCCESS;
1806 }
1807 if (cctx)
1808 cli_event_time_start(cctx->perf, PERFT_BYTECODE);
1809 ctx->env = &bcs->env;
1810 context_safe(ctx);
1811 if (test_mode) {
1812 jit_ev = cli_events_new(BCEV_LASTEVENT);
1813 interp_ev = cli_events_new(BCEV_LASTEVENT);
1814 if (!jit_ev || !interp_ev) {
1815 cli_events_free(jit_ev);
1816 cli_events_free(interp_ev);
1817 return CL_EMEM;
1818 }
1819 if (register_events(jit_ev) == -1 ||
1820 register_events(interp_ev) == -1) {
1821 cli_events_free(jit_ev);
1822 cli_events_free(interp_ev);
1823 return CL_EBYTECODE_TESTFAIL;
1824 }
1825 }
1826 cli_event_time_start(g_sigevents, bc->sigtime_id);
1827 if (bc->state == bc_interp || test_mode) {
1828 ctx->bc_events = interp_ev;
1829 memset(&func, 0, sizeof(func));
1830 func.numInsts = 1;
1831 func.numValues = 1;
1832 func.numConstants = 0;
1833 func.numBytes = ctx->bytes;
1834 memset(ctx->values + ctx->bytes - 8, 0, 8);
1835
1836 inst.opcode = OP_BC_CALL_DIRECT;
1837 inst.interp_op = OP_BC_CALL_DIRECT * 5;
1838 inst.dest = func.numArgs;
1839 inst.type = 0;
1840 inst.u.ops.numOps = ctx->numParams;
1841 inst.u.ops.funcid = ctx->funcid;
1842 inst.u.ops.ops = ctx->operands;
1843 inst.u.ops.opsizes = ctx->opsizes;
1844 cli_dbgmsg("Bytecode %u: executing in interpreter mode\n", bc->id);
1845
1846 ctx->on_jit = 0;
1847
1848 cli_event_time_start(interp_ev, BCEV_EXEC_TIME);
1849 ret = cli_vm_execute(ctx->bc, ctx, &func, &inst);
1850 cli_event_time_stop(interp_ev, BCEV_EXEC_TIME);
1851
1852 cli_event_int(interp_ev, BCEV_EXEC_RETURNVALUE, ret);
1853 cli_event_string(interp_ev, BCEV_VIRUSNAME, ctx->virname);
1854
1855 /* need to be called here to catch any extracted but not yet scanned files */
1856 if (ctx->outfd && (ret != CL_VIRUS || cctx->options->general & CL_SCAN_GENERAL_ALLMATCHES))
1857 cli_bcapi_extract_new(ctx, -1);
1858 }
1859 if (bc->state == bc_jit || test_mode) {
1860 if (test_mode) {
1861 ctx->off = 0;
1862 }
1863 ctx->bc_events = jit_ev;
1864 cli_dbgmsg("Bytecode %u: executing in JIT mode\n", bc->id);
1865
1866 ctx->on_jit = 1;
1867 cli_event_time_start(jit_ev, BCEV_EXEC_TIME);
1868 ret = cli_vm_execute_jit(bcs, ctx, &bc->funcs[ctx->funcid]);
1869 cli_event_time_stop(jit_ev, BCEV_EXEC_TIME);
1870
1871 cli_event_int(jit_ev, BCEV_EXEC_RETURNVALUE, ret);
1872 cli_event_string(jit_ev, BCEV_VIRUSNAME, ctx->virname);
1873
1874 /* need to be called here to catch any extracted but not yet scanned files */
1875 if (ctx->outfd && (ret != CL_VIRUS || cctx->options->general & CL_SCAN_GENERAL_ALLMATCHES))
1876 cli_bcapi_extract_new(ctx, -1);
1877 }
1878 cli_event_time_stop(g_sigevents, bc->sigtime_id);
1879 if (ctx->virname)
1880 cli_event_count(g_sigevents, bc->sigmatch_id);
1881
1882 if (test_mode) {
1883 unsigned interp_errors = cli_event_errors(interp_ev);
1884 unsigned jit_errors = cli_event_errors(jit_ev);
1885 unsigned interp_warns = 0, jit_warns = 0;
1886 int ok = 1;
1887 enum bc_events evid;
1888
1889 if (interp_errors || jit_errors) {
1890 cli_infomsg(cctx, "bytecode %d encountered %u JIT and %u interpreter errors\n",
1891 bc->id, interp_errors, jit_errors);
1892 ok = 0;
1893 }
1894 if (!ctx->no_diff && cli_event_diff_all(interp_ev, jit_ev, NULL)) {
1895 cli_infomsg(cctx, "bytecode %d execution different with JIT and interpreter, see --debug for details\n",
1896 bc->id);
1897 ok = 0;
1898 }
1899 for (evid = BCEV_API_WARN_BEGIN + 1; evid < BCEV_API_WARN_END; evid++) {
1900 union ev_val v;
1901 uint32_t count = 0;
1902 cli_event_get(interp_ev, evid, &v, &count);
1903 interp_warns += count;
1904 count = 0;
1905 cli_event_get(jit_ev, evid, &v, &count);
1906 jit_warns += count;
1907 }
1908 if (interp_warns || jit_warns) {
1909 cli_infomsg(cctx, "bytecode %d encountered %u JIT and %u interpreter warnings\n",
1910 bc->id, interp_warns, jit_warns);
1911 ok = 0;
1912 }
1913 /*cli_event_debug(jit_ev, BCEV_EXEC_TIME);
1914 cli_event_debug(interp_ev, BCEV_EXEC_TIME);
1915 cli_event_debug(g_sigevents, bc->sigtime_id);*/
1916 if (!ok) {
1917 cli_events_free(jit_ev);
1918 cli_events_free(interp_ev);
1919 return CL_EBYTECODE_TESTFAIL;
1920 }
1921 }
1922 cli_events_free(jit_ev);
1923 cli_events_free(interp_ev);
1924 if (cctx)
1925 cli_event_time_stop(cctx->perf, PERFT_BYTECODE);
1926 return ret;
1927 }
1928
cli_bytecode_context_getresult_int(struct cli_bc_ctx * ctx)1929 uint64_t cli_bytecode_context_getresult_int(struct cli_bc_ctx *ctx)
1930 {
1931 return *(uint32_t *)ctx->values; /*XXX*/
1932 }
1933
cli_bytecode_destroy(struct cli_bc * bc)1934 void cli_bytecode_destroy(struct cli_bc *bc)
1935 {
1936 unsigned i, j, k;
1937 free(bc->metadata.compiler);
1938 free(bc->metadata.sigmaker);
1939
1940 if (bc->funcs) {
1941 for (i = 0; i < bc->num_func; i++) {
1942 struct cli_bc_func *f = &bc->funcs[i];
1943 if (!f)
1944 continue;
1945 free(f->types);
1946
1947 for (j = 0; j < f->numBB; j++) {
1948 struct cli_bc_bb *BB = &f->BB[j];
1949 for (k = 0; k < BB->numInsts; k++) {
1950 struct cli_bc_inst *ii = &BB->insts[k];
1951 if (operand_counts[ii->opcode] > 3 ||
1952 ii->opcode == OP_BC_CALL_DIRECT || ii->opcode == OP_BC_CALL_API) {
1953 free(ii->u.ops.ops);
1954 free(ii->u.ops.opsizes);
1955 }
1956 }
1957 }
1958 free(f->BB);
1959 free(f->allinsts);
1960 free(f->constants);
1961 }
1962 free(bc->funcs);
1963 }
1964 if (bc->types) {
1965 for (i = NUM_STATIC_TYPES; i < bc->num_types; i++) {
1966 if (bc->types[i].containedTypes)
1967 free(bc->types[i].containedTypes);
1968 }
1969 free(bc->types);
1970 }
1971
1972 if (bc->globals) {
1973 for (i = 0; i < bc->num_globals; i++) {
1974 free(bc->globals[i]);
1975 }
1976 free(bc->globals);
1977 }
1978 if (bc->dbgnodes) {
1979 for (i = 0; i < bc->dbgnode_cnt; i++) {
1980 for (j = 0; j < bc->dbgnodes[i].numelements; j++) {
1981 struct cli_bc_dbgnode_element *el = &bc->dbgnodes[i].elements[j];
1982 if (el && el->string)
1983 free(el->string);
1984 }
1985 }
1986 free(bc->dbgnodes);
1987 }
1988 free(bc->globaltys);
1989 if (bc->uses_apis)
1990 cli_bitset_free(bc->uses_apis);
1991 free(bc->lsig);
1992 free(bc->hook_name);
1993 free(bc->globalBytes);
1994 memset(bc, 0, sizeof(*bc));
1995 }
1996
1997 #define MAP(val) \
1998 do { \
1999 operand_t o = val; \
2000 if (o & 0x80000000) { \
2001 o &= 0x7fffffff; \
2002 if (o > bc->num_globals) { \
2003 cli_errmsg("bytecode: global out of range: %u > %u, for instruction %u in function %u\n", \
2004 o, (unsigned)bc->num_globals, j, i); \
2005 free(map); \
2006 free(gmap); \
2007 return CL_EBYTECODE; \
2008 } \
2009 val = 0x80000000 | gmap[o]; \
2010 break; \
2011 } \
2012 if (o >= totValues) { \
2013 cli_errmsg("bytecode: operand out of range: %u > %u, for instruction %u in function %u\n", o, totValues, j, i); \
2014 free(map); \
2015 free(gmap); \
2016 return CL_EBYTECODE; \
2017 } \
2018 val = map[o]; \
2019 } while (0)
2020
2021 #define MAPPTR(val) \
2022 { \
2023 if ((val < bcfunc->numValues) && bcfunc->types[val] & 0x8000) \
2024 val = map[val] | 0x40000000; \
2025 else \
2026 MAP(val); \
2027 }
2028
ptr_compose(int32_t id,uint32_t offset)2029 static inline int64_t ptr_compose(int32_t id, uint32_t offset)
2030 {
2031 uint64_t i = id;
2032 return (i << 32) | offset;
2033 }
2034
get_geptypesize(const struct cli_bc * bc,uint16_t tid)2035 static inline int get_geptypesize(const struct cli_bc *bc, uint16_t tid)
2036 {
2037 const struct cli_bc_type *ty;
2038 if (tid >= bc->num_types + 65) {
2039 cli_errmsg("bytecode: typeid out of range %u >= %u\n", tid, bc->num_types);
2040 return -1;
2041 }
2042 if (tid <= 64) {
2043 cli_errmsg("bytecode: invalid type for gep (%u)\n", tid);
2044 return -1;
2045 }
2046 ty = &bc->types[tid - 65];
2047 if (ty->kind != DPointerType) {
2048 cli_errmsg("bytecode: invalid gep type, must be pointer: %u\n", tid);
2049 return -1;
2050 }
2051 return typesize(bc, ty->containedTypes[0]);
2052 }
2053
calc_gepz(struct cli_bc * bc,struct cli_bc_func * func,uint16_t tid,operand_t op)2054 static int calc_gepz(struct cli_bc *bc, struct cli_bc_func *func, uint16_t tid, operand_t op)
2055 {
2056 unsigned off = 0, i;
2057 uint32_t *gepoff;
2058 const struct cli_bc_type *ty;
2059 if (tid >= bc->num_types + 65) {
2060 cli_errmsg("bytecode: typeid out of range %u >= %u\n", tid, bc->num_types);
2061 return -1;
2062 }
2063 if (tid <= 65) {
2064 cli_errmsg("bytecode: invalid type for gep (%u)\n", tid);
2065 return -1;
2066 }
2067 ty = &bc->types[tid - 65];
2068 if (ty->kind != DPointerType || ty->containedTypes[0] < 65) {
2069 cli_errmsg("bytecode: invalid gep type, must be pointer to nonint: %u\n", tid);
2070 return -1;
2071 }
2072 ty = &bc->types[ty->containedTypes[0] - 65];
2073 if (ty->kind != DStructType && ty->kind != DPackedStructType)
2074 return 0;
2075 gepoff = (uint32_t *)&func->constants[op - func->numValues];
2076 if (*gepoff >= ty->numElements) {
2077 cli_errmsg("bytecode: gep offset out of range: %d >= %d\n", (uint32_t)*gepoff, ty->numElements);
2078 return -1;
2079 }
2080 for (i = 0; i < *gepoff; i++) {
2081 off += typesize(bc, ty->containedTypes[i]);
2082 }
2083 *gepoff = off;
2084 return 1;
2085 }
2086
cli_bytecode_prepare_interpreter(struct cli_bc * bc)2087 static int cli_bytecode_prepare_interpreter(struct cli_bc *bc)
2088 {
2089 unsigned i, j, k;
2090 uint64_t *gmap;
2091 unsigned bcglobalid = cli_apicall_maxglobal - _FIRST_GLOBAL + 2;
2092 int ret = CL_SUCCESS;
2093 bc->numGlobalBytes = 0;
2094 gmap = cli_malloc(bc->num_globals * sizeof(*gmap));
2095 if (!gmap) {
2096 cli_errmsg("interpreter: Unable to allocate memory for global map: %zu\n", bc->num_globals * sizeof(*gmap));
2097 return CL_EMEM;
2098 }
2099 for (j = 0; j < bc->num_globals; j++) {
2100 uint16_t ty = bc->globaltys[j];
2101 unsigned align = typealign(bc, ty);
2102 assert(align);
2103 bc->numGlobalBytes = (bc->numGlobalBytes + align - 1) & (~(align - 1));
2104 gmap[j] = bc->numGlobalBytes;
2105 bc->numGlobalBytes += typesize(bc, ty);
2106 }
2107 if (bc->numGlobalBytes) {
2108 bc->globalBytes = cli_calloc(1, bc->numGlobalBytes);
2109 if (!bc->globalBytes) {
2110 cli_errmsg("interpreter: Unable to allocate memory for globalBytes: %u\n", bc->numGlobalBytes);
2111 free(gmap);
2112 return CL_EMEM;
2113 }
2114 } else
2115 bc->globalBytes = NULL;
2116
2117 for (j = 0; j < bc->num_globals; j++) {
2118 struct cli_bc_type *ty;
2119 if (bc->globaltys[j] < 65)
2120 continue;
2121 ty = &bc->types[bc->globaltys[j] - 65];
2122 switch (ty->kind) {
2123 case DPointerType: {
2124 uint64_t ptr;
2125 if (bc->globals[j][1] >= _FIRST_GLOBAL) {
2126 ptr = ptr_compose(bc->globals[j][1] - _FIRST_GLOBAL + 1,
2127 bc->globals[j][0]);
2128 } else {
2129 if (bc->globals[j][1] > bc->num_globals)
2130 continue;
2131 ptr = ptr_compose(bcglobalid,
2132 gmap[bc->globals[j][1]] + bc->globals[j][0]);
2133 }
2134 *(uint64_t *)&bc->globalBytes[gmap[j]] = ptr;
2135 break;
2136 }
2137 case DArrayType: {
2138 unsigned elsize, i, off = gmap[j];
2139 /* TODO: support other than ints in arrays */
2140 elsize = typesize(bc, ty->containedTypes[0]);
2141 switch (elsize) {
2142 case 1:
2143 for (i = 0; i < ty->numElements; i++)
2144 bc->globalBytes[off + i] = bc->globals[j][i];
2145 break;
2146 case 2:
2147 for (i = 0; i < ty->numElements; i++)
2148 *(uint16_t *)&bc->globalBytes[off + i * 2] = bc->globals[j][i];
2149 break;
2150 case 4:
2151 for (i = 0; i < ty->numElements; i++)
2152 *(uint32_t *)&bc->globalBytes[off + i * 4] = bc->globals[j][i];
2153 break;
2154 case 8:
2155 for (i = 0; i < ty->numElements; i++)
2156 *(uint64_t *)&bc->globalBytes[off + i * 8] = bc->globals[j][i];
2157 break;
2158 default:
2159 cli_dbgmsg("interpreter: unsupported elsize: %u\n", elsize);
2160 }
2161 break;
2162 }
2163 default:
2164 /*TODO*/
2165 if (!bc->globals[j][1])
2166 continue; /* null */
2167 break;
2168 }
2169 }
2170
2171 for (i = 0; i < bc->num_func && ret == CL_SUCCESS; i++) {
2172 struct cli_bc_func *bcfunc = &bc->funcs[i];
2173 unsigned totValues = bcfunc->numValues + bcfunc->numConstants + bc->num_globals;
2174 unsigned *map = cli_malloc(sizeof(*map) * (size_t)totValues);
2175 if (!map) {
2176 cli_errmsg("interpreter: Unable to allocate memory for map: %zu\n", sizeof(*map) * (size_t)totValues);
2177 free(gmap);
2178 return CL_EMEM;
2179 }
2180 bcfunc->numBytes = 0;
2181 for (j = 0; j < bcfunc->numValues; j++) {
2182 uint16_t ty = bcfunc->types[j];
2183 unsigned align;
2184 align = typealign(bc, ty);
2185 assert(!ty || typesize(bc, ty));
2186 assert(align);
2187 bcfunc->numBytes = (bcfunc->numBytes + align - 1) & (~(align - 1));
2188 map[j] = bcfunc->numBytes;
2189 /* printf("%d -> %d, %u\n", j, map[j], typesize(bc, ty)); */
2190 bcfunc->numBytes += typesize(bc, ty);
2191 /* TODO: don't allow size 0, it is always a bug! */
2192 }
2193 bcfunc->numBytes = (bcfunc->numBytes + 7) & ~7;
2194 for (j = 0; j < bcfunc->numConstants; j++) {
2195 map[bcfunc->numValues + j] = bcfunc->numBytes;
2196 bcfunc->numBytes += 8;
2197 }
2198 for (j = 0; j < bcfunc->numInsts && ret == CL_SUCCESS; j++) {
2199 struct cli_bc_inst *inst = &bcfunc->allinsts[j];
2200 inst->dest = map[inst->dest];
2201 switch (inst->opcode) {
2202 case OP_BC_ADD:
2203 case OP_BC_SUB:
2204 case OP_BC_MUL:
2205 case OP_BC_UDIV:
2206 case OP_BC_SDIV:
2207 case OP_BC_UREM:
2208 case OP_BC_SREM:
2209 case OP_BC_SHL:
2210 case OP_BC_LSHR:
2211 case OP_BC_ASHR:
2212 case OP_BC_AND:
2213 case OP_BC_OR:
2214 case OP_BC_XOR:
2215 case OP_BC_ICMP_EQ:
2216 case OP_BC_ICMP_NE:
2217 case OP_BC_ICMP_UGT:
2218 case OP_BC_ICMP_UGE:
2219 case OP_BC_ICMP_ULT:
2220 case OP_BC_ICMP_ULE:
2221 case OP_BC_ICMP_SGT:
2222 case OP_BC_ICMP_SGE:
2223 case OP_BC_ICMP_SLT:
2224 case OP_BC_ICMP_SLE:
2225 case OP_BC_COPY:
2226 case OP_BC_STORE:
2227 MAP(inst->u.binop[0]);
2228 MAP(inst->u.binop[1]);
2229 break;
2230 case OP_BC_SEXT:
2231 case OP_BC_ZEXT:
2232 case OP_BC_TRUNC:
2233 MAP(inst->u.cast.source);
2234 break;
2235 case OP_BC_BRANCH:
2236 MAP(inst->u.branch.condition);
2237 break;
2238 case OP_BC_JMP:
2239 break;
2240 case OP_BC_RET:
2241 MAP(inst->u.unaryop);
2242 break;
2243 case OP_BC_SELECT:
2244 MAP(inst->u.three[0]);
2245 MAP(inst->u.three[1]);
2246 MAP(inst->u.three[2]);
2247 break;
2248 case OP_BC_CALL_API: /* fall-through */
2249 case OP_BC_CALL_DIRECT: {
2250 struct cli_bc_func *target = NULL;
2251 if (inst->opcode == OP_BC_CALL_DIRECT) {
2252 target = &bc->funcs[inst->u.ops.funcid];
2253 if (inst->u.ops.funcid > bc->num_func) {
2254 cli_errmsg("bytecode: called function out of range: %u > %u\n", inst->u.ops.funcid, bc->num_func);
2255 ret = CL_EBYTECODE;
2256 } else if (inst->u.ops.numOps != target->numArgs) {
2257 cli_errmsg("bytecode: call operands don't match function prototype\n");
2258 ret = CL_EBYTECODE;
2259 }
2260 } else {
2261 /* APIs have at most 2 parameters always */
2262 if (inst->u.ops.numOps > 5) {
2263 cli_errmsg("bytecode: call operands don't match function prototype\n");
2264 ret = CL_EBYTECODE;
2265 }
2266 }
2267 if (ret != CL_SUCCESS)
2268 break;
2269 if (inst->u.ops.numOps > 0) {
2270 inst->u.ops.opsizes = cli_malloc(sizeof(*inst->u.ops.opsizes) * inst->u.ops.numOps);
2271 if (!inst->u.ops.opsizes) {
2272 cli_errmsg("Out of memory when allocating operand sizes\n");
2273 ret = CL_EMEM;
2274 break;
2275 }
2276 } else {
2277 inst->u.ops.opsizes = NULL;
2278 break;
2279 }
2280 for (k = 0; k < inst->u.ops.numOps; k++) {
2281 MAPPTR(inst->u.ops.ops[k]);
2282 if (inst->opcode == OP_BC_CALL_DIRECT)
2283 inst->u.ops.opsizes[k] = typesize(bc, target->types[k]);
2284 else
2285 inst->u.ops.opsizes[k] = 32; /*XXX*/
2286 }
2287 break;
2288 }
2289 case OP_BC_LOAD:
2290 MAPPTR(inst->u.unaryop);
2291 break;
2292 case OP_BC_GEP1:
2293 if (inst->u.three[1] & 0x80000000 ||
2294 bcfunc->types[inst->u.binop[1]] & 0x8000) {
2295 cli_errmsg("bytecode: gep1 of alloca is not allowed\n");
2296 ret = CL_EBYTECODE;
2297 }
2298 if (ret != CL_SUCCESS)
2299 break;
2300 MAP(inst->u.three[1]);
2301 MAP(inst->u.three[2]);
2302 inst->u.three[0] = get_geptypesize(bc, inst->u.three[0]);
2303 if ((int)(inst->u.three[0]) == -1)
2304 ret = CL_EBYTECODE;
2305 break;
2306 case OP_BC_GEPZ:
2307 /*three[0] is the type*/
2308 if (inst->u.three[1] & 0x80000000 ||
2309 bcfunc->types[inst->u.three[1]] & 0x8000)
2310 inst->interp_op = 5 * (inst->interp_op / 5);
2311 else
2312 inst->interp_op = 5 * (inst->interp_op / 5) + 3;
2313 MAP(inst->u.three[1]);
2314 if (calc_gepz(bc, bcfunc, inst->u.three[0], inst->u.three[2]) == -1)
2315 ret = CL_EBYTECODE;
2316 if (ret == CL_SUCCESS)
2317 MAP(inst->u.three[2]);
2318 break;
2319 /* case OP_BC_GEPN:
2320 *TODO
2321 break;*/
2322 case OP_BC_MEMSET:
2323 case OP_BC_MEMCPY:
2324 case OP_BC_MEMMOVE:
2325 case OP_BC_MEMCMP:
2326 MAPPTR(inst->u.three[0]);
2327 MAPPTR(inst->u.three[1]);
2328 MAP(inst->u.three[2]);
2329 break;
2330 case OP_BC_RET_VOID:
2331 case OP_BC_ISBIGENDIAN:
2332 case OP_BC_ABORT:
2333 /* no operands */
2334 break;
2335 case OP_BC_BSWAP16:
2336 case OP_BC_BSWAP32:
2337 case OP_BC_BSWAP64:
2338 MAP(inst->u.unaryop);
2339 break;
2340 case OP_BC_PTRDIFF32:
2341 MAPPTR(inst->u.binop[0]);
2342 MAPPTR(inst->u.binop[1]);
2343 break;
2344 case OP_BC_PTRTOINT64:
2345 MAPPTR(inst->u.unaryop);
2346 break;
2347 default:
2348 cli_warnmsg("Bytecode: unhandled opcode: %d\n", inst->opcode);
2349 ret = CL_EBYTECODE;
2350 }
2351 }
2352 if (map)
2353 free(map);
2354 }
2355 free(gmap);
2356 bc->state = bc_interp;
2357 return ret;
2358 }
2359
add_selfcheck(struct cli_all_bc * bcs)2360 static int add_selfcheck(struct cli_all_bc *bcs)
2361 {
2362 struct cli_bc_func *func;
2363 struct cli_bc_inst *inst;
2364 struct cli_bc *bc;
2365
2366 bcs->all_bcs = cli_realloc2(bcs->all_bcs, sizeof(*bcs->all_bcs) * (bcs->count + 1));
2367 if (!bcs->all_bcs) {
2368 cli_errmsg("cli_loadcbc: Can't allocate memory for bytecode entry\n");
2369 return CL_EMEM;
2370 }
2371 bc = &bcs->all_bcs[bcs->count++];
2372 memset(bc, 0, sizeof(*bc));
2373
2374 bc->trusted = 1;
2375 bc->num_globals = 1;
2376 bc->globals = cli_calloc(1, sizeof(*bc->globals));
2377 if (!bc->globals) {
2378 cli_errmsg("Failed to allocate memory for globals\n");
2379 return CL_EMEM;
2380 }
2381 bc->globals[0] = cli_calloc(1, sizeof(*bc->globals[0]));
2382 if (!bc->globals[0]) {
2383 cli_errmsg("Failed to allocate memory for globals\n");
2384 return CL_EMEM;
2385 }
2386 bc->globaltys = cli_calloc(1, sizeof(*bc->globaltys));
2387 if (!bc->globaltys) {
2388 cli_errmsg("Failed to allocate memory for globaltypes\n");
2389 return CL_EMEM;
2390 }
2391 bc->globaltys[0] = 32;
2392 *bc->globals[0] = 0;
2393 bc->id = ~0;
2394 bc->kind = 0;
2395 bc->num_types = 5;
2396 bc->num_func = 1;
2397 bc->funcs = cli_calloc(1, sizeof(*bc->funcs));
2398 if (!bc->funcs) {
2399 cli_errmsg("Failed to allocate memory for func\n");
2400 return CL_EMEM;
2401 }
2402 func = bc->funcs;
2403 func->numInsts = 2;
2404 func->numLocals = 1;
2405 func->numValues = 1;
2406 func->numConstants = 1;
2407 func->numBB = 1;
2408 func->returnType = 32;
2409 func->types = cli_calloc(1, sizeof(*func->types));
2410 if (!func->types) {
2411 cli_errmsg("Failed to allocate memory for types\n");
2412 return CL_EMEM;
2413 }
2414 func->types[0] = 32;
2415 func->BB = cli_calloc(1, sizeof(*func->BB));
2416 if (!func->BB) {
2417 cli_errmsg("Failed to allocate memory for BB\n");
2418 return CL_EMEM;
2419 }
2420 func->allinsts = cli_calloc(2, sizeof(*func->allinsts));
2421 if (!func->allinsts) {
2422 cli_errmsg("Failed to allocate memory for insts\n");
2423 return CL_EMEM;
2424 }
2425 func->BB->numInsts = 2;
2426 func->BB->insts = func->allinsts;
2427 func->constants = cli_calloc(1, sizeof(*func->constants));
2428 if (!func->constants) {
2429 cli_errmsg("Failed to allocate memory for constants\n");
2430 return CL_EMEM;
2431 }
2432 func->constants[0] = 0xf00d;
2433 inst = func->allinsts;
2434
2435 inst->opcode = OP_BC_CALL_API;
2436 inst->u.ops.numOps = 1;
2437 inst->u.ops.opsizes = NULL;
2438 inst->u.ops.ops = cli_calloc(1, sizeof(*inst->u.ops.ops));
2439 if (!inst->u.ops.ops) {
2440 cli_errmsg("Failed to allocate memory for instructions\n");
2441 return CL_EMEM;
2442 }
2443 inst->u.ops.ops[0] = 1;
2444 inst->u.ops.funcid = 18; /* test2 */
2445 inst->dest = 0;
2446 inst->type = 32;
2447 inst->interp_op = inst->opcode * 5 + 3;
2448
2449 inst = &func->allinsts[1];
2450 inst->opcode = OP_BC_RET;
2451 inst->type = 32;
2452 inst->u.unaryop = 0;
2453 inst->interp_op = inst->opcode * 5;
2454
2455 bc->state = bc_loaded;
2456 return 0;
2457 }
2458
run_selfcheck(struct cli_all_bc * bcs)2459 static int run_selfcheck(struct cli_all_bc *bcs)
2460 {
2461 struct cli_bc_ctx *ctx;
2462 struct cli_bc *bc = &bcs->all_bcs[bcs->count - 1];
2463 int rc;
2464 if (bc->state != bc_jit && bc->state != bc_interp) {
2465 cli_errmsg("Failed to prepare selfcheck bytecode\n");
2466 return CL_EBYTECODE;
2467 }
2468 ctx = cli_bytecode_context_alloc();
2469 if (!ctx) {
2470 cli_errmsg("Failed to allocate bytecode context\n");
2471 return CL_EMEM;
2472 }
2473 cli_bytecode_context_setfuncid(ctx, bc, 0);
2474
2475 cli_dbgmsg("bytecode self test running\n");
2476 ctx->bytecode_timeout = 0;
2477 rc = cli_bytecode_run(bcs, bc, ctx);
2478 cli_bytecode_context_destroy(ctx);
2479 if (rc != CL_SUCCESS) {
2480 cli_errmsg("bytecode self test failed: %s\n",
2481 cl_strerror(rc));
2482 } else {
2483 cli_dbgmsg("bytecode self test succeeded\n");
2484 }
2485 return rc;
2486 }
2487
selfcheck(int jit,struct cli_bcengine * engine)2488 static int selfcheck(int jit, struct cli_bcengine *engine)
2489 {
2490 struct cli_all_bc bcs;
2491 int rc;
2492
2493 memset(&bcs, 0, sizeof(bcs));
2494 bcs.all_bcs = NULL;
2495 bcs.count = 0;
2496 bcs.engine = engine;
2497 rc = add_selfcheck(&bcs);
2498 if (rc == CL_SUCCESS) {
2499 if (jit) {
2500 if (!bcs.engine) {
2501 cli_dbgmsg("bytecode: JIT disabled\n");
2502 rc = CL_BREAK; /* no JIT - not fatal */
2503 } else {
2504 rc = cli_bytecode_prepare_jit(&bcs);
2505 }
2506 } else {
2507 rc = cli_bytecode_prepare_interpreter(bcs.all_bcs);
2508 }
2509 if (rc == CL_SUCCESS)
2510 rc = run_selfcheck(&bcs);
2511 if (rc == CL_BREAK)
2512 rc = CL_SUCCESS;
2513 }
2514 cli_bytecode_destroy(bcs.all_bcs);
2515 free(bcs.all_bcs);
2516 cli_bytecode_done_jit(&bcs, 1);
2517 if (rc != CL_SUCCESS) {
2518 cli_errmsg("Bytecode: failed to run selfcheck in %s mode: %s\n",
2519 jit ? "JIT" : "interpreter", cl_strerror(rc));
2520 }
2521 return rc;
2522 }
2523
set_mode(struct cl_engine * engine,enum bytecode_mode mode)2524 static int set_mode(struct cl_engine *engine, enum bytecode_mode mode)
2525 {
2526 if (engine->bytecode_mode == mode)
2527 return 0;
2528 if (engine->bytecode_mode == CL_BYTECODE_MODE_OFF) {
2529 cli_errmsg("bytecode: already turned off, can't turn it on again!\n");
2530 return -1;
2531 }
2532 cli_dbgmsg("Bytecode: mode changed to %d\n", mode);
2533 if (engine->bytecode_mode == CL_BYTECODE_MODE_TEST) {
2534 if (mode == CL_BYTECODE_MODE_OFF || have_clamjit) {
2535 cli_errmsg("bytecode: in test mode but JIT/bytecode is about to be disabled: %d\n", mode);
2536 engine->bytecode_mode = mode;
2537 return -1;
2538 }
2539 return 0;
2540 }
2541 if (engine->bytecode_mode == CL_BYTECODE_MODE_JIT) {
2542 cli_errmsg("bytecode: in JIT mode but JIT is about to be disabled: %d\n", mode);
2543 engine->bytecode_mode = mode;
2544 return -1;
2545 }
2546 engine->bytecode_mode = mode;
2547 return 0;
2548 }
2549
2550 /* runs the first bytecode of the specified kind, or the builtin one if no
2551 * bytecode of that kind is loaded */
run_builtin_or_loaded(struct cli_all_bc * bcs,uint8_t kind,const char * builtin_cbc,struct cli_bc_ctx * ctx,const char * desc)2552 static int run_builtin_or_loaded(struct cli_all_bc *bcs, uint8_t kind, const char *builtin_cbc, struct cli_bc_ctx *ctx, const char *desc)
2553 {
2554 unsigned i, builtin = 0, rc = 0;
2555 struct cli_bc *bc = NULL;
2556
2557 for (i = 0; i < bcs->count; i++) {
2558 bc = &bcs->all_bcs[i];
2559 if (bc->kind == kind)
2560 break;
2561 }
2562 if (i == bcs->count)
2563 bc = NULL;
2564 if (!bc) {
2565 /* no loaded bytecode found, load the builtin one! */
2566 struct cli_dbio dbio;
2567 bc = cli_calloc(1, sizeof(*bc));
2568 if (!bc) {
2569 cli_errmsg("Out of memory allocating bytecode\n");
2570 return CL_EMEM;
2571 }
2572 builtin = 1;
2573
2574 memset(&dbio, 0, sizeof(dbio));
2575 dbio.usebuf = 1;
2576 dbio.bufpt = dbio.buf = (char *)builtin_cbc;
2577 dbio.bufsize = strlen(builtin_cbc) + 1;
2578 if (!dbio.bufsize || dbio.bufpt[dbio.bufsize - 2] != '\n') {
2579 cli_errmsg("Invalid builtin bytecode: missing terminator\n");
2580 free(bc);
2581 return CL_EMALFDB;
2582 }
2583
2584 rc = cli_bytecode_load(bc, NULL, &dbio, 1, 0);
2585 if (rc) {
2586 cli_errmsg("Failed to load builtin %s bytecode\n", desc);
2587 free(bc);
2588 return rc;
2589 }
2590 }
2591 rc = cli_bytecode_prepare_interpreter(bc);
2592 if (rc) {
2593 cli_errmsg("Failed to prepare %s %s bytecode for interpreter: %s\n",
2594 builtin ? "builtin" : "loaded", desc, cl_strerror(rc));
2595 }
2596 if (bc->state != bc_interp) {
2597 cli_errmsg("Failed to prepare %s %s bytecode for interpreter\n",
2598 builtin ? "builtin" : "loaded", desc);
2599 rc = CL_EMALFDB;
2600 }
2601 if (!rc) {
2602 cli_bytecode_context_setfuncid(ctx, bc, 0);
2603 cli_dbgmsg("Bytecode: %s running (%s)\n", desc,
2604 builtin ? "builtin" : "loaded");
2605 rc = cli_bytecode_run(bcs, bc, ctx);
2606 }
2607 if (rc) {
2608 cli_errmsg("Failed to execute %s %s bytecode: %s\n", builtin ? "builtin" : "loaded",
2609 desc, cl_strerror(rc));
2610 }
2611 if (builtin) {
2612 cli_bytecode_destroy(bc);
2613 free(bc);
2614 }
2615 return rc;
2616 }
2617
cli_bytecode_prepare2(struct cl_engine * engine,struct cli_all_bc * bcs,unsigned dconfmask)2618 int cli_bytecode_prepare2(struct cl_engine *engine, struct cli_all_bc *bcs, unsigned dconfmask)
2619 {
2620 unsigned i, interp = 0, jitok = 0, jitcount = 0;
2621 int rc;
2622 struct cli_bc_ctx *ctx;
2623
2624 if (!bcs->count) {
2625 cli_dbgmsg("No bytecodes loaded, not running builtin test\n");
2626 return CL_SUCCESS;
2627 }
2628
2629 engine->bytecode_mode = CL_BYTECODE_MODE_AUTO;
2630 cli_detect_environment(&bcs->env);
2631 switch (bcs->env.arch) {
2632 case arch_i386:
2633 case arch_x86_64:
2634 if (!(dconfmask & BYTECODE_JIT_X86)) {
2635 cli_dbgmsg("Bytecode: disabled on X86 via DCONF\n");
2636 if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2637 return CL_EBYTECODE_TESTFAIL;
2638 }
2639 break;
2640 case arch_ppc32:
2641 case arch_ppc64:
2642 if (!(dconfmask & BYTECODE_JIT_PPC)) {
2643 cli_dbgmsg("Bytecode: disabled on PPC via DCONF\n");
2644 if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2645 return CL_EBYTECODE_TESTFAIL;
2646 }
2647 break;
2648 case arch_arm:
2649 if (!(dconfmask & BYTECODE_JIT_ARM)) {
2650 cli_dbgmsg("Bytecode: disabled on ARM via DCONF\n");
2651 if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2652 return CL_EBYTECODE_TESTFAIL;
2653 }
2654 break;
2655 default:
2656 cli_dbgmsg("Bytecode: JIT not supported on this architecture, falling back\n");
2657 if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2658 return CL_EBYTECODE_TESTFAIL;
2659 break;
2660 }
2661 cli_dbgmsg("Bytecode: mode is %d\n", engine->bytecode_mode);
2662
2663 ctx = cli_bytecode_context_alloc();
2664 if (!ctx) {
2665 cli_errmsg("Bytecode: failed to allocate bytecode context\n");
2666 return CL_EMEM;
2667 }
2668 rc = run_builtin_or_loaded(bcs, BC_STARTUP, builtin_bc_startup, ctx, "BC_STARTUP");
2669 if (rc != CL_SUCCESS) {
2670 cli_warnmsg("Bytecode: BC_STARTUP failed to run, disabling ALL bytecodes! Please report to https://github.com/Cisco-Talos/clamav/issues\n");
2671 ctx->bytecode_disable_status = 2;
2672 } else {
2673 cli_dbgmsg("Bytecode: disable status is %d\n", ctx->bytecode_disable_status);
2674 rc = cli_bytecode_context_getresult_int(ctx);
2675 /* check magic number, don't use 0 here because it is too easy for a
2676 * buggy bytecode to return 0 */
2677 if ((unsigned int)rc != (unsigned int)0xda7aba5e) {
2678 cli_warnmsg("Bytecode: selftest failed with code %08x. Please report to https://github.com/Cisco-Talos/clamav/issues\n",
2679 rc);
2680 if (engine->bytecode_mode == CL_BYTECODE_MODE_TEST)
2681 return CL_EBYTECODE_TESTFAIL;
2682 }
2683 }
2684 switch (ctx->bytecode_disable_status) {
2685 case 1:
2686 if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2687 return CL_EBYTECODE_TESTFAIL;
2688 break;
2689 case 2:
2690 if (set_mode(engine, CL_BYTECODE_MODE_OFF) == -1)
2691 return CL_EBYTECODE_TESTFAIL;
2692 break;
2693 default:
2694 break;
2695 }
2696 cli_bytecode_context_destroy(ctx);
2697
2698 if (engine->bytecode_mode != CL_BYTECODE_MODE_INTERPRETER &&
2699 engine->bytecode_mode != CL_BYTECODE_MODE_OFF) {
2700 selfcheck(1, bcs->engine);
2701 rc = cli_bytecode_prepare_jit(bcs);
2702 if (rc == CL_SUCCESS) {
2703 jitok = 1;
2704 cli_dbgmsg("Bytecode: %u bytecode prepared with JIT\n", bcs->count);
2705 if (engine->bytecode_mode != CL_BYTECODE_MODE_TEST)
2706 return CL_SUCCESS;
2707 }
2708 if (engine->bytecode_mode == CL_BYTECODE_MODE_JIT) {
2709 cli_errmsg("Bytecode: JIT required, but not all bytecodes could be prepared with JIT\n");
2710 return CL_EMALFDB;
2711 }
2712 if (rc && engine->bytecode_mode == CL_BYTECODE_MODE_TEST) {
2713 cli_errmsg("Bytecode: Test mode, but not all bytecodes could be prepared with JIT\n");
2714 return CL_EBYTECODE_TESTFAIL;
2715 }
2716 } else {
2717 cli_bytecode_done_jit(bcs, 0);
2718 }
2719
2720 if (!(dconfmask & BYTECODE_INTERPRETER)) {
2721 cli_dbgmsg("Bytecode: needs interpreter, but interpreter is disabled\n");
2722 if (set_mode(engine, CL_BYTECODE_MODE_OFF) == -1)
2723 return CL_EBYTECODE_TESTFAIL;
2724 }
2725
2726 if (engine->bytecode_mode == CL_BYTECODE_MODE_OFF) {
2727 for (i = 0; i < bcs->count; i++)
2728 bcs->all_bcs[i].state = bc_disabled;
2729 cli_dbgmsg("Bytecode: ALL bytecodes disabled\n");
2730 return CL_SUCCESS;
2731 }
2732
2733 for (i = 0; i < bcs->count; i++) {
2734 struct cli_bc *bc = &bcs->all_bcs[i];
2735 if (bc->state == bc_jit) {
2736 jitcount++;
2737 if (engine->bytecode_mode != CL_BYTECODE_MODE_TEST)
2738 continue;
2739 }
2740 if (bc->state == bc_interp) {
2741 interp++;
2742 continue;
2743 }
2744 rc = cli_bytecode_prepare_interpreter(bc);
2745 if (rc != CL_SUCCESS) {
2746 bc->state = bc_disabled;
2747 cli_warnmsg("Bytecode: %d failed to prepare for interpreter mode\n", bc->id);
2748 return rc;
2749 }
2750 interp++;
2751 }
2752 cli_dbgmsg("Bytecode: %u bytecode prepared with JIT, "
2753 "%u prepared with interpreter, %u total\n",
2754 jitcount, interp, bcs->count);
2755 return CL_SUCCESS;
2756 }
2757
cli_bytecode_init(struct cli_all_bc * allbc)2758 int cli_bytecode_init(struct cli_all_bc *allbc)
2759 {
2760 int ret;
2761 memset(allbc, 0, sizeof(*allbc));
2762 ret = cli_bytecode_init_jit(allbc, 0 /*XXX*/);
2763 cli_dbgmsg("Bytecode initialized in %s mode\n",
2764 allbc->engine ? "JIT" : "interpreter");
2765 allbc->inited = 1;
2766 return ret;
2767 }
2768
cli_bytecode_done(struct cli_all_bc * allbc)2769 int cli_bytecode_done(struct cli_all_bc *allbc)
2770 {
2771 return cli_bytecode_done_jit(allbc, 0);
2772 }
2773
cli_bytecode_context_setfile(struct cli_bc_ctx * ctx,fmap_t * map)2774 int cli_bytecode_context_setfile(struct cli_bc_ctx *ctx, fmap_t *map)
2775 {
2776 ctx->fmap = map;
2777 ctx->file_size = map->len;
2778 ctx->hooks.filesize = &ctx->file_size;
2779 return 0;
2780 }
2781
cli_bytecode_runlsig(cli_ctx * cctx,struct cli_target_info * tinfo,const struct cli_all_bc * bcs,unsigned bc_idx,const uint32_t * lsigcnt,const uint32_t * lsigsuboff,fmap_t * map)2782 int cli_bytecode_runlsig(cli_ctx *cctx, struct cli_target_info *tinfo,
2783 const struct cli_all_bc *bcs, unsigned bc_idx,
2784 const uint32_t *lsigcnt,
2785 const uint32_t *lsigsuboff, fmap_t *map)
2786 {
2787 int ret;
2788 struct cli_bc_ctx ctx;
2789 const struct cli_bc *bc = &bcs->all_bcs[bc_idx - 1];
2790 struct cli_pe_hook_data pehookdata;
2791
2792 if (bc_idx == 0)
2793 return CL_ENULLARG;
2794
2795 memset(&ctx, 0, sizeof(ctx));
2796 cli_bytecode_context_setfuncid(&ctx, bc, 0);
2797 ctx.hooks.match_counts = lsigcnt;
2798 ctx.hooks.match_offsets = lsigsuboff;
2799 cli_bytecode_context_setctx(&ctx, cctx);
2800 cli_bytecode_context_setfile(&ctx, map);
2801 if (tinfo && tinfo->status == 1) {
2802 ctx.sections = tinfo->exeinfo.sections;
2803 memset(&pehookdata, 0, sizeof(pehookdata));
2804 pehookdata.offset = tinfo->exeinfo.offset;
2805 pehookdata.ep = tinfo->exeinfo.ep;
2806 pehookdata.nsections = tinfo->exeinfo.nsections;
2807 pehookdata.hdr_size = tinfo->exeinfo.hdr_size;
2808 ctx.hooks.pedata = &pehookdata;
2809 ctx.resaddr = tinfo->exeinfo.res_addr;
2810 }
2811 if (bc->hook_lsig_id) {
2812 cli_dbgmsg("hook lsig id %d matched (bc %d)\n", bc->hook_lsig_id, bc->id);
2813 /* this is a bytecode for a hook, defer running it until hook is
2814 * executed, so that it has all the info for the hook */
2815 if (cctx->hook_lsig_matches)
2816 cli_bitset_set(cctx->hook_lsig_matches, bc->hook_lsig_id - 1);
2817 /* save match counts */
2818 memcpy(&ctx.lsigcnt, lsigcnt, 64 * 4);
2819 memcpy(&ctx.lsigoff, lsigsuboff, 64 * 4);
2820 cli_bytecode_context_clear(&ctx);
2821 return CL_SUCCESS;
2822 }
2823
2824 cli_dbgmsg("Running bytecode for logical signature match\n");
2825 ret = cli_bytecode_run(bcs, bc, &ctx);
2826 if (ret != CL_SUCCESS) {
2827 cli_warnmsg("Bytecode %u failed to run: %s\n", bc->id, cl_strerror(ret));
2828 cli_bytecode_context_clear(&ctx);
2829 return CL_SUCCESS;
2830 }
2831 if (ctx.virname) {
2832 if (cctx->num_viruses == 0) {
2833 int rc;
2834 cli_dbgmsg("Bytecode found virus: %s\n", ctx.virname);
2835 if (!strncmp(ctx.virname, "BC.Heuristics", 13))
2836 rc = cli_append_possibly_unwanted(cctx, ctx.virname);
2837 else
2838 rc = cli_append_virus(cctx, ctx.virname);
2839 cli_bytecode_context_clear(&ctx);
2840 return rc;
2841 } else {
2842 cli_bytecode_context_clear(&ctx);
2843 return CL_VIRUS;
2844 }
2845 }
2846 ret = cli_bytecode_context_getresult_int(&ctx);
2847 cli_dbgmsg("Bytecode %u returned code: %u\n", bc->id, ret);
2848 cli_bytecode_context_clear(&ctx);
2849 return CL_SUCCESS;
2850 }
2851
cli_bytecode_runhook(cli_ctx * cctx,const struct cl_engine * engine,struct cli_bc_ctx * ctx,unsigned id,fmap_t * map)2852 int cli_bytecode_runhook(cli_ctx *cctx, const struct cl_engine *engine, struct cli_bc_ctx *ctx,
2853 unsigned id, fmap_t *map)
2854 {
2855 const unsigned *hooks = engine->hooks[id - _BC_START_HOOKS];
2856 unsigned i, hooks_cnt = engine->hooks_cnt[id - _BC_START_HOOKS];
2857 int ret;
2858 unsigned executed = 0, breakflag = 0, errorflag = 0;
2859
2860 if (!cctx)
2861 return CL_ENULLARG;
2862
2863 cli_dbgmsg("Bytecode executing hook id %u (%u hooks)\n", id, hooks_cnt);
2864 /* restore match counts */
2865 cli_bytecode_context_setfile(ctx, map);
2866 ctx->hooks.match_counts = ctx->lsigcnt;
2867 ctx->hooks.match_offsets = ctx->lsigoff;
2868 for (i = 0; i < hooks_cnt; i++) {
2869 const struct cli_bc *bc = &engine->bcs.all_bcs[hooks[i]];
2870 if (bc->lsig) {
2871 if (!cctx->hook_lsig_matches ||
2872 !cli_bitset_test(cctx->hook_lsig_matches, bc->hook_lsig_id - 1))
2873 continue;
2874 cli_dbgmsg("Bytecode: executing bytecode %u (lsig matched)\n", bc->id);
2875 }
2876 cli_bytecode_context_setfuncid(ctx, bc, 0);
2877 ret = cli_bytecode_run(&engine->bcs, bc, ctx);
2878 executed++;
2879 if (ret != CL_SUCCESS) {
2880 cli_warnmsg("Bytecode %u failed to run: %s\n", bc->id, cl_strerror(ret));
2881 errorflag = 1;
2882 continue;
2883 }
2884 if (ctx->virname) {
2885 cli_dbgmsg("Bytecode runhook found virus: %s\n", ctx->virname);
2886 cli_append_virus(cctx, ctx->virname);
2887 if (!(cctx->options->general & CL_SCAN_GENERAL_ALLMATCHES)) {
2888 cli_bytecode_context_clear(ctx);
2889 return CL_VIRUS;
2890 }
2891 cli_bytecode_context_reset(ctx);
2892 continue;
2893 }
2894 ret = cli_bytecode_context_getresult_int(ctx);
2895 /* TODO: use prefix here */
2896 cli_dbgmsg("Bytecode %u returned %u\n", bc->id, ret);
2897 if (ret == 0xcea5e) {
2898 cli_dbgmsg("Bytecode set BREAK flag in hook!\n");
2899 breakflag = 1;
2900 }
2901 if (!ret) {
2902 char *tempfile;
2903 int fd = cli_bytecode_context_getresult_file(ctx, &tempfile);
2904 if (fd && fd != -1) {
2905 if (cctx->engine->keeptmp)
2906 cli_dbgmsg("Bytecode %u unpacked file saved in %s\n",
2907 bc->id, tempfile);
2908 else
2909 cli_dbgmsg("Bytecode %u unpacked file\n", bc->id);
2910 lseek(fd, 0, SEEK_SET);
2911 cli_dbgmsg("***** Scanning unpacked file ******\n");
2912
2913 ret = cli_magic_scan_desc(fd, tempfile, cctx, NULL);
2914
2915 if (!cctx->engine->keeptmp)
2916 if (ftruncate(fd, 0) == -1)
2917 cli_dbgmsg("ftruncate failed on %d\n", fd);
2918 close(fd);
2919 if (!cctx->engine->keeptmp) {
2920 if (tempfile && cli_unlink(tempfile))
2921 ret = CL_EUNLINK;
2922 }
2923 free(tempfile);
2924 if (ret != CL_CLEAN) {
2925 if (ret == CL_VIRUS) {
2926 cli_dbgmsg("Scanning unpacked file by bytecode %u found a virus\n", bc->id);
2927 if (cctx->options->general & CL_SCAN_GENERAL_ALLMATCHES) {
2928 cli_bytecode_context_reset(ctx);
2929 continue;
2930 }
2931 cli_bytecode_context_clear(ctx);
2932 return ret;
2933 }
2934 }
2935 cli_bytecode_context_reset(ctx);
2936 continue;
2937 }
2938 }
2939 cli_bytecode_context_reset(ctx);
2940 }
2941 if (executed)
2942 cli_dbgmsg("Bytecode: executed %u bytecodes for this hook\n", executed);
2943 else
2944 cli_dbgmsg("Bytecode: no logical signature matched, no bytecode executed\n");
2945 if (errorflag && cctx->engine->bytecode_mode == CL_BYTECODE_MODE_TEST)
2946 return CL_EBYTECODE_TESTFAIL;
2947 return breakflag ? CL_BREAK : CL_CLEAN;
2948 }
2949
cli_bytecode_context_setpe(struct cli_bc_ctx * ctx,const struct cli_pe_hook_data * data,const struct cli_exe_section * sections)2950 int cli_bytecode_context_setpe(struct cli_bc_ctx *ctx, const struct cli_pe_hook_data *data, const struct cli_exe_section *sections)
2951 {
2952 ctx->sections = sections;
2953 ctx->hooks.pedata = data;
2954 return 0;
2955 }
2956
cli_bytecode_context_setctx(struct cli_bc_ctx * ctx,void * cctx)2957 void cli_bytecode_context_setctx(struct cli_bc_ctx *ctx, void *cctx)
2958 {
2959 ctx->ctx = cctx;
2960 ctx->bytecode_timeout = ((cli_ctx *)cctx)->engine->bytecode_timeout;
2961 }
2962
cli_bytecode_describe(const struct cli_bc * bc)2963 void cli_bytecode_describe(const struct cli_bc *bc)
2964 {
2965 char buf[128];
2966 int cols;
2967 unsigned i;
2968 time_t stamp;
2969 int had;
2970
2971 if (!bc) {
2972 printf("(null bytecode)\n");
2973 return;
2974 }
2975
2976 stamp = bc->metadata.timestamp;
2977 printf("Bytecode format functionality level: %u\n", bc->metadata.formatlevel);
2978 printf("Bytecode metadata:\n\tcompiler version: %s\n",
2979 bc->metadata.compiler ? bc->metadata.compiler : "N/A");
2980 printf("\tcompiled on: (%d) %s",
2981 (uint32_t)stamp,
2982 cli_ctime(&stamp, buf, sizeof(buf)));
2983 printf("\tcompiled by: %s\n", bc->metadata.sigmaker ? bc->metadata.sigmaker : "N/A");
2984 /*TODO: parse and display arch name, also take it into account when
2985 JITing*/
2986 printf("\ttarget exclude: %d\n", bc->metadata.targetExclude);
2987 printf("\tbytecode type: ");
2988 switch (bc->kind) {
2989 case BC_GENERIC:
2990 puts("generic, not loadable by clamscan/clamd");
2991 break;
2992 case BC_STARTUP:
2993 puts("run on startup (unique)");
2994 break;
2995 case BC_LOGICAL:
2996 puts("logical only");
2997 break;
2998 case BC_PE_UNPACKER:
2999 puts("PE unpacker hook");
3000 break;
3001 case BC_PE_ALL:
3002 puts("all PE hook");
3003 break;
3004 case BC_PRECLASS:
3005 puts("preclass hook");
3006 break;
3007 case BC_ELF_UNPACKER:
3008 puts("ELF unpacker hook");
3009 break;
3010 case BC_MACHO_UNPACKER:
3011 puts("Mach-O unpacker hook");
3012 break;
3013 default:
3014 printf("Unknown (type %u)", bc->kind);
3015 break;
3016 }
3017 /* 0 means no limit */
3018 printf("\tbytecode functionality level: %u - %u\n",
3019 bc->metadata.minfunc, bc->metadata.maxfunc);
3020 printf("\tbytecode logical signature: %s\n",
3021 bc->lsig ? bc->lsig : "<none>");
3022 printf("\tvirusname prefix: %s\n",
3023 bc->vnameprefix);
3024 printf("\tvirusnames: %u\n", bc->vnames_cnt);
3025 printf("\tbytecode triggered on: ");
3026 switch (bc->kind) {
3027 case BC_GENERIC:
3028 puts("N/A (loaded in clambc only)");
3029 break;
3030 case BC_LOGICAL:
3031 puts("files matching logical signature");
3032 break;
3033 case BC_PE_UNPACKER:
3034 if (bc->lsig)
3035 puts("PE files matching logical signature (unpacked)");
3036 else
3037 puts("all PE files! (unpacked)");
3038 break;
3039 case BC_PDF:
3040 puts("PDF files");
3041 break;
3042 case BC_PE_ALL:
3043 if (bc->lsig)
3044 puts("PE files matching logical signature");
3045 else
3046 puts("all PE files!");
3047 break;
3048 case BC_PRECLASS:
3049 if (bc->lsig)
3050 puts("PRECLASS files matching logical signature");
3051 else
3052 puts("all PRECLASS files!");
3053 break;
3054 case BC_ELF_UNPACKER:
3055 if (bc->lsig)
3056 puts("ELF files matching logical signature (unpacked)");
3057 else
3058 puts("all ELF files! (unpacked)");
3059 break;
3060 case BC_MACHO_UNPACKER:
3061 if (bc->lsig)
3062 puts("Mach-O files matching logical signature (unpacked)");
3063 else
3064 puts("all Mach-O files! (unpacked)");
3065 break;
3066 default:
3067 puts("N/A (unknown type)\n");
3068 break;
3069 }
3070 printf("\tnumber of functions: %u\n\tnumber of types: %u\n",
3071 bc->num_func, bc->num_types);
3072 printf("\tnumber of global constants: %u\n", (unsigned)bc->num_globals);
3073 printf("\tnumber of debug nodes: %u\n", bc->dbgnode_cnt);
3074 printf("\tbytecode APIs used:");
3075 cols = 0; /* remaining */
3076 had = 0;
3077 for (i = 0; i < cli_apicall_maxapi; i++) {
3078 if (cli_bitset_test(bc->uses_apis, i)) {
3079 unsigned len = strlen(cli_apicalls[i].name);
3080 if (had)
3081 printf(",");
3082 if (len > (unsigned int)cols) {
3083 printf("\n\t");
3084 cols = 72;
3085 }
3086 printf(" %s", cli_apicalls[i].name);
3087 had = 1;
3088 cols -= len;
3089 }
3090 }
3091 printf("\n");
3092 }
3093
3094 const char *bc_tystr[] = {
3095 "DFunctionType",
3096 "DPointerType",
3097 "DStructType",
3098 "DPackedStructType",
3099 "DArrayType"};
3100
3101 const char *bc_opstr[] = {
3102 "OP_BC_NULL",
3103 "OP_BC_ADD", /* =1*/
3104 "OP_BC_SUB",
3105 "OP_BC_MUL",
3106 "OP_BC_UDIV",
3107 "OP_BC_SDIV",
3108 "OP_BC_UREM",
3109 "OP_BC_SREM",
3110 "OP_BC_SHL",
3111 "OP_BC_LSHR",
3112 "OP_BC_ASHR",
3113 "OP_BC_AND",
3114 "OP_BC_OR",
3115 "OP_BC_XOR",
3116
3117 "OP_BC_TRUNC",
3118 "OP_BC_SEXT",
3119 "OP_BC_ZEXT",
3120
3121 "OP_BC_BRANCH",
3122 "OP_BC_JMP",
3123 "OP_BC_RET",
3124 "OP_BC_RET_VOID",
3125
3126 "OP_BC_ICMP_EQ",
3127 "OP_BC_ICMP_NE",
3128 "OP_BC_ICMP_UGT",
3129 "OP_BC_ICMP_UGE",
3130 "OP_BC_ICMP_ULT",
3131 "OP_BC_ICMP_ULE",
3132 "OP_BC_ICMP_SGT",
3133 "OP_BC_ICMP_SGE",
3134 "OP_BC_ICMP_SLE",
3135 "OP_BC_ICMP_SLT",
3136 "OP_BC_SELECT",
3137 "OP_BC_CALL_DIRECT",
3138 "OP_BC_CALL_API",
3139 "OP_BC_COPY",
3140 "OP_BC_GEP1",
3141 "OP_BC_GEPZ",
3142 "OP_BC_GEPN",
3143 "OP_BC_STORE",
3144 "OP_BC_LOAD",
3145 "OP_BC_MEMSET",
3146 "OP_BC_MEMCPY",
3147 "OP_BC_MEMMOVE",
3148 "OP_BC_MEMCMP",
3149 "OP_BC_ISBIGENDIAN",
3150 "OP_BC_ABORT",
3151 "OP_BC_BSWAP16",
3152 "OP_BC_BSWAP32",
3153 "OP_BC_BSWAP64",
3154 "OP_BC_PTRDIFF32",
3155 "OP_BC_PTRTOINT64",
3156 "OP_BC_INVALID" /* last */
3157 };
3158
3159 extern unsigned cli_numapicalls;
cli_bytetype_helper(const struct cli_bc * bc,unsigned tid)3160 static void cli_bytetype_helper(const struct cli_bc *bc, unsigned tid)
3161 {
3162 unsigned i, j;
3163 const struct cli_bc_type *ty;
3164
3165 if (tid & 0x8000) {
3166 printf("alloc ");
3167 tid &= 0x7fff;
3168 }
3169
3170 if (tid < 65) {
3171 printf("i%d", tid);
3172 return;
3173 }
3174
3175 i = tid - 65;
3176 if (i >= bc->num_types) {
3177 printf("invalid type");
3178 return;
3179 }
3180 ty = &bc->types[i];
3181
3182 switch (ty->kind) {
3183 case DFunctionType:
3184 cli_bytetype_helper(bc, ty->containedTypes[0]);
3185 printf(" func ( ");
3186 for (j = 1; j < ty->numElements; ++j) {
3187 cli_bytetype_helper(bc, ty->containedTypes[0]);
3188 printf(" ");
3189 }
3190 printf(")");
3191 break;
3192 case DPointerType:
3193 cli_bytetype_helper(bc, ty->containedTypes[0]);
3194 printf("*");
3195 break;
3196 case DStructType:
3197 case DPackedStructType:
3198 printf("{ ");
3199 for (j = 0; j < ty->numElements; ++j) {
3200 cli_bytetype_helper(bc, ty->containedTypes[0]);
3201 printf(" ");
3202 }
3203 printf("}");
3204 break;
3205 case DArrayType:
3206 printf("[");
3207 printf("%d x ", ty->numElements);
3208 cli_bytetype_helper(bc, ty->containedTypes[0]);
3209 printf("]");
3210 break;
3211 default:
3212 printf("unhandled type kind %d, cannot parse", ty->kind);
3213 break;
3214 }
3215 }
3216
cli_bytetype_describe(const struct cli_bc * bc)3217 void cli_bytetype_describe(const struct cli_bc *bc)
3218 {
3219 unsigned i, tid;
3220
3221 printf("found %d extra types of %d total, starting at tid %d\n",
3222 bc->num_types, 64 + bc->num_types, bc->start_tid);
3223
3224 printf("TID KIND INTERNAL\n");
3225 printf("------------------------------------------------------------------------\n");
3226 for (i = 0, tid = 65; i < bc->num_types - 1; ++i, ++tid) {
3227 printf("%3d: %-20s", tid, bc_tystr[bc->types[i].kind]);
3228 cli_bytetype_helper(bc, tid);
3229 printf("\n");
3230 }
3231 printf("------------------------------------------------------------------------\n");
3232 }
3233
cli_bytevalue_describe(const struct cli_bc * bc,unsigned funcid)3234 void cli_bytevalue_describe(const struct cli_bc *bc, unsigned funcid)
3235 {
3236 unsigned i, total = 0;
3237 const struct cli_bc_func *func;
3238
3239 if (funcid >= bc->num_func) {
3240 printf("bytecode diagnostic: funcid [%u] outside bytecode numfuncs [%u]\n",
3241 funcid, bc->num_func);
3242 return;
3243 }
3244 // globals
3245 printf("found a total of %zu globals\n", bc->num_globals);
3246 printf("GID ID VALUE\n");
3247 printf("------------------------------------------------------------------------\n");
3248 for (i = 0; i < bc->num_globals; ++i) {
3249 printf("%3u [%3u]: ", i, i);
3250 cli_bytetype_helper(bc, bc->globaltys[i]);
3251 printf(" unknown\n");
3252 }
3253 printf("------------------------------------------------------------------------\n");
3254
3255 // arguments and local values
3256 func = &bc->funcs[funcid];
3257 printf("found %d values with %d arguments and %d locals\n",
3258 func->numValues, func->numArgs, func->numLocals);
3259 printf("VID ID VALUE\n");
3260 printf("------------------------------------------------------------------------\n");
3261 for (i = 0; i < func->numValues; ++i) {
3262 printf("%3u [%3u]: ", i, total++);
3263 cli_bytetype_helper(bc, func->types[i]);
3264 if (i < func->numArgs)
3265 printf(" argument");
3266 printf("\n");
3267 }
3268 printf("------------------------------------------------------------------------\n");
3269
3270 // constants
3271 printf("found a total of %d constants\n", func->numConstants);
3272 printf("CID ID VALUE\n");
3273 printf("------------------------------------------------------------------------\n");
3274 for (i = 0; i < func->numConstants; ++i) {
3275 printf("%3u [%3u]: " STDu64 "(0x" STDx64 ")\n", i, total++, func->constants[i], func->constants[i]);
3276 }
3277 printf("------------------------------------------------------------------------\n");
3278 printf("found a total of %u total values\n", total);
3279 printf("------------------------------------------------------------------------\n");
3280 return;
3281 }
3282
cli_byteinst_describe(const struct cli_bc_inst * inst,unsigned * bbnum)3283 void cli_byteinst_describe(const struct cli_bc_inst *inst, unsigned *bbnum)
3284 {
3285 unsigned j;
3286 char inst_str[256];
3287 const struct cli_apicall *api;
3288
3289 if (inst->opcode > OP_BC_INVALID) {
3290 printf("opcode %u[%u] of type %u is not implemented yet!",
3291 inst->opcode, inst->interp_op / 5, inst->interp_op % 5);
3292 return;
3293 }
3294
3295 snprintf(inst_str, sizeof(inst_str), "%-20s[%-3d/%3d/%3d]", bc_opstr[inst->opcode],
3296 inst->opcode, inst->interp_op, inst->interp_op % inst->opcode);
3297 printf("%-35s", inst_str);
3298 switch (inst->opcode) {
3299 // binary operations
3300 case OP_BC_ADD:
3301 printf("%d = %d + %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3302 break;
3303 case OP_BC_SUB:
3304 printf("%d = %d - %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3305 break;
3306 case OP_BC_MUL:
3307 printf("%d = %d * %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3308 break;
3309 case OP_BC_UDIV:
3310 printf("%d = %d / %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3311 break;
3312 case OP_BC_SDIV:
3313 printf("%d = %d / %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3314 break;
3315 case OP_BC_UREM:
3316 printf("%d = %d %% %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3317 break;
3318 case OP_BC_SREM:
3319 printf("%d = %d %% %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3320 break;
3321 case OP_BC_SHL:
3322 printf("%d = %d << %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3323 break;
3324 case OP_BC_LSHR:
3325 printf("%d = %d >> %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3326 break;
3327 case OP_BC_ASHR:
3328 printf("%d = %d >> %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3329 break;
3330 case OP_BC_AND:
3331 printf("%d = %d & %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3332 break;
3333 case OP_BC_OR:
3334 printf("%d = %d | %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3335 break;
3336 case OP_BC_XOR:
3337 printf("%d = %d ^ %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3338 break;
3339
3340 // casting operations
3341 case OP_BC_TRUNC:
3342 printf("%d = %d trunc " STDx64, inst->dest, inst->u.cast.source, inst->u.cast.mask);
3343 break;
3344 case OP_BC_SEXT:
3345 printf("%d = %d sext " STDx64, inst->dest, inst->u.cast.source, inst->u.cast.mask);
3346 break;
3347 case OP_BC_ZEXT:
3348 printf("%d = %d zext " STDx64, inst->dest, inst->u.cast.source, inst->u.cast.mask);
3349 break;
3350
3351 // control operations (termination instructions)
3352 case OP_BC_BRANCH:
3353 printf("br %d ? bb.%d : bb.%d", inst->u.branch.condition,
3354 inst->u.branch.br_true, inst->u.branch.br_false);
3355 (*bbnum)++;
3356 break;
3357 case OP_BC_JMP:
3358 printf("jmp bb.%d", inst->u.jump);
3359 (*bbnum)++;
3360 break;
3361 case OP_BC_RET:
3362 printf("ret %d", inst->u.unaryop);
3363 (*bbnum)++;
3364 break;
3365 case OP_BC_RET_VOID:
3366 printf("ret void");
3367 (*bbnum)++;
3368 break;
3369
3370 // comparison operations
3371 case OP_BC_ICMP_EQ:
3372 printf("%d = (%d == %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3373 break;
3374 case OP_BC_ICMP_NE:
3375 printf("%d = (%d != %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3376 break;
3377 case OP_BC_ICMP_UGT:
3378 printf("%d = (%d > %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3379 break;
3380 case OP_BC_ICMP_UGE:
3381 printf("%d = (%d >= %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3382 break;
3383 case OP_BC_ICMP_ULT:
3384 printf("%d = (%d < %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3385 break;
3386 case OP_BC_ICMP_ULE:
3387 printf("%d = (%d >= %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3388 break;
3389 case OP_BC_ICMP_SGT:
3390 printf("%d = (%d > %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3391 break;
3392 case OP_BC_ICMP_SGE:
3393 printf("%d = (%d >= %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3394 break;
3395 case OP_BC_ICMP_SLE:
3396 printf("%d = (%d <= %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3397 break;
3398 case OP_BC_ICMP_SLT:
3399 printf("%d = (%d < %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3400 break;
3401 case OP_BC_SELECT:
3402 printf("%d = %d ? %d : %d)", inst->dest, inst->u.three[0],
3403 inst->u.three[1], inst->u.three[2]);
3404 break;
3405
3406 // function calling
3407 case OP_BC_CALL_DIRECT:
3408 printf("%d = call F.%d (", inst->dest, inst->u.ops.funcid);
3409 for (j = 0; j < inst->u.ops.numOps; ++j) {
3410 if (j == inst->u.ops.numOps - 1) {
3411 printf("%d", inst->u.ops.ops[j]);
3412 } else {
3413 printf("%d, ", inst->u.ops.ops[j]);
3414 }
3415 }
3416 printf(")");
3417 break;
3418 case OP_BC_CALL_API: {
3419 if (inst->u.ops.funcid > cli_numapicalls) {
3420 printf("apicall FID %d not yet implemented!\n", inst->u.ops.funcid);
3421 break;
3422 }
3423 api = &cli_apicalls[inst->u.ops.funcid];
3424 switch (api->kind) {
3425 case 0:
3426 printf("%d = %s[%d] (%d, %d)", inst->dest, api->name,
3427 inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1]);
3428 break;
3429 case 1:
3430 printf("%d = %s[%d] (p.%d, %d)", inst->dest, api->name,
3431 inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1]);
3432 break;
3433 case 2:
3434 printf("%d = %s[%d] (%d)", inst->dest, api->name,
3435 inst->u.ops.funcid, inst->u.ops.ops[0]);
3436 break;
3437 case 3:
3438 printf("p.%d = %s[%d] (%d)", inst->dest, api->name,
3439 inst->u.ops.funcid, inst->u.ops.ops[0]);
3440 break;
3441 case 4:
3442 printf("%d = %s[%d] (p.%d, %d, %d, %d, %d)", inst->dest, api->name,
3443 inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1],
3444 inst->u.ops.ops[2], inst->u.ops.ops[3], inst->u.ops.ops[4]);
3445 break;
3446 case 5:
3447 printf("%d = %s[%d] ()", inst->dest, api->name,
3448 inst->u.ops.funcid);
3449 break;
3450 case 6:
3451 printf("p.%d = %s[%d] (%d, %d)", inst->dest, api->name,
3452 inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1]);
3453 break;
3454 case 7:
3455 printf("%d = %s[%d] (%d, %d, %d)", inst->dest, api->name,
3456 inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1],
3457 inst->u.ops.ops[2]);
3458 break;
3459 case 8:
3460 printf("%d = %s[%d] (p.%d, %d, p.%d, %d)", inst->dest, api->name,
3461 inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1],
3462 inst->u.ops.ops[2], inst->u.ops.ops[3]);
3463 break;
3464 case 9:
3465 printf("%d = %s[%d] (p.%d, %d, %d)", inst->dest, api->name,
3466 inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1],
3467 inst->u.ops.ops[2]);
3468 break;
3469 default:
3470 printf("type %u apicalls not yet implemented!\n", api->kind);
3471 break;
3472 }
3473 } break;
3474
3475 // memory operations
3476 case OP_BC_COPY:
3477 printf("cp %d -> %d", inst->u.binop[0], inst->u.binop[1]);
3478 break;
3479 case OP_BC_GEP1:
3480 printf("%d = gep1 p.%d + (%d * %d)", inst->dest, inst->u.three[1],
3481 inst->u.three[2], inst->u.three[0]);
3482 break;
3483 case OP_BC_GEPZ:
3484 printf("%d = gepz p.%d + (%d)", inst->dest,
3485 inst->u.three[1], inst->u.three[2]);
3486 break;
3487 case OP_BC_GEPN:
3488 printf("illegal opcode, impossible");
3489 break;
3490 case OP_BC_STORE:
3491 printf("store %d -> p.%d", inst->u.binop[0], inst->u.binop[1]);
3492 break;
3493 case OP_BC_LOAD:
3494 printf("load %d <- p.%d", inst->dest, inst->u.unaryop);
3495 break;
3496
3497 // llvm intrinsics
3498 case OP_BC_MEMSET:
3499 printf("%d = memset (p.%d, %d, %d)", inst->dest, inst->u.three[0],
3500 inst->u.three[1], inst->u.three[2]);
3501 break;
3502 case OP_BC_MEMCPY:
3503 printf("%d = memcpy (p.%d, p.%d, %d)", inst->dest, inst->u.three[0],
3504 inst->u.three[1], inst->u.three[2]);
3505 break;
3506 case OP_BC_MEMMOVE:
3507 printf("%d = memmove (p.%d, p.%d, %d)", inst->dest, inst->u.three[0],
3508 inst->u.three[1], inst->u.three[2]);
3509 break;
3510 case OP_BC_MEMCMP:
3511 printf("%d = memcmp (p.%d, p.%d, %d)", inst->dest, inst->u.three[0],
3512 inst->u.three[1], inst->u.three[2]);
3513 break;
3514
3515 // utility operations
3516 case OP_BC_ISBIGENDIAN:
3517 printf("%d = isbigendian()", inst->dest);
3518 break;
3519 case OP_BC_ABORT:
3520 printf("ABORT!!");
3521 break;
3522 case OP_BC_BSWAP16:
3523 printf("%d = bswap16 %d", inst->dest, inst->u.unaryop);
3524 break;
3525 case OP_BC_BSWAP32:
3526 printf("%d = bswap32 %d", inst->dest, inst->u.unaryop);
3527 break;
3528 case OP_BC_BSWAP64:
3529 printf("%d = bswap64 %d", inst->dest, inst->u.unaryop);
3530 break;
3531 case OP_BC_PTRDIFF32:
3532 printf("%d = ptrdiff32 p.%d p.%d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3533 break;
3534 case OP_BC_PTRTOINT64:
3535 printf("%d = ptrtoint64 p.%d", inst->dest, inst->u.unaryop);
3536 break;
3537 case OP_BC_INVALID: /* last */
3538 printf("INVALID!!");
3539 break;
3540
3541 default:
3542 // redundant check
3543 printf("opcode %u[%u] of type %u is not implemented yet!",
3544 inst->opcode, inst->interp_op / 5, inst->interp_op % 5);
3545 break;
3546 }
3547 }
3548
cli_bytefunc_describe(const struct cli_bc * bc,unsigned funcid)3549 void cli_bytefunc_describe(const struct cli_bc *bc, unsigned funcid)
3550 {
3551 unsigned i, bbnum, bbpre;
3552 const struct cli_bc_func *func;
3553
3554 if (funcid >= bc->num_func) {
3555 printf("bytecode diagnostic: funcid [%u] outside bytecode numfuncs [%u]\n",
3556 funcid, bc->num_func);
3557 return;
3558 }
3559
3560 func = &bc->funcs[funcid];
3561
3562 printf("FUNCTION ID: F.%d -> NUMINSTS %d\n", funcid, func->numInsts);
3563 printf("BB IDX OPCODE [ID /IID/MOD] INST\n");
3564 printf("------------------------------------------------------------------------\n");
3565 bbpre = 0;
3566 bbnum = 0;
3567 for (i = 0; i < func->numInsts; ++i) {
3568 if (bbpre != bbnum) {
3569 printf("\n");
3570 bbpre = bbnum;
3571 }
3572
3573 printf("%3d %3d ", bbnum, i);
3574 cli_byteinst_describe(&func->allinsts[i], &bbnum);
3575 printf("\n");
3576 }
3577 printf("------------------------------------------------------------------------\n");
3578 }
3579