1 /*
2  *  Load, and verify ClamAV bytecode.
3  *
4  *  Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
5  *  Copyright (C) 2009-2013 Sourcefire, Inc.
6  *
7  *  Authors: Török Edvin
8  *
9  *  This program is free software; you can redistribute it and/or modify
10  *  it under the terms of the GNU General Public License version 2 as
11  *  published by the Free Software Foundation.
12  *
13  *  This program is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *  GNU General Public License for more details.
17  *
18  *  You should have received a copy of the GNU General Public License
19  *  along with this program; if not, write to the Free Software
20  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21  *  MA 02110-1301, USA.
22  */
23 
24 #if HAVE_CONFIG_H
25 #include "clamav-config.h"
26 #endif
27 
28 #include <string.h>
29 #include <assert.h>
30 #include <fcntl.h>
31 
32 #if HAVE_JSON
33 #include "json.h"
34 #endif
35 
36 #include "dconf.h"
37 #include "clamav.h"
38 #include "others.h"
39 #include "pe.h"
40 #include "bytecode.h"
41 #include "bytecode_priv.h"
42 #include "bytecode_detect.h"
43 #include "readdb.h"
44 #include "scanners.h"
45 #include "bytecode_api.h"
46 #include "bytecode_api_impl.h"
47 #include "builtin_bytecodes.h"
48 
49 #ifndef MAX_TRACKED_BC
50 #define MAX_TRACKED_BC 64
51 #endif
52 #define BC_EVENTS_PER_SIG 2
53 #define MAX_BC_SIGEVENT_ID MAX_TRACKED_BC *BC_EVENTS_PER_SIG
54 
55 cli_events_t *g_sigevents = NULL;
56 unsigned int g_sigid;
57 
58 /* dummy values */
59 static const uint32_t nomatch[64] = {
60     0xdeadbeef, 0xdeaddead, 0xbeefdead, 0xdeaddead, 0xdeadbeef, 0, 0, 0,
61     0, 0, 0, 0, 0, 0, 0, 0,
62     0, 0, 0, 0, 0, 0, 0, 0,
63     0, 0, 0, 0, 0, 0, 0, 0,
64     0, 0, 0, 0, 0, 0, 0, 0,
65     0, 0, 0, 0, 0, 0, 0, 0,
66     0, 0, 0, 0, 0, 0, 0, 0,
67     0, 0, 0, 0, 0, 0, 0, 0};
68 static const uint32_t nooffsets[64] = {
69     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
70     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
71     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
72     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
73     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
74     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
75     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
76     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
77     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
78     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
79     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
80     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
81     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
82     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
83     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE,
84     CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE, CLI_OFF_NONE};
85 
86 static const uint16_t nokind;
87 static const uint32_t nofilesize;
88 static const struct cli_pe_hook_data nopedata;
89 
context_safe(struct cli_bc_ctx * ctx)90 static void context_safe(struct cli_bc_ctx *ctx)
91 {
92     /* make sure these are never NULL */
93     if (!ctx->hooks.kind)
94         ctx->hooks.kind = &nokind;
95     if (!ctx->hooks.match_counts)
96         ctx->hooks.match_counts = nomatch;
97     if (!ctx->hooks.match_offsets)
98         ctx->hooks.match_offsets = nooffsets;
99     if (!ctx->hooks.filesize)
100         ctx->hooks.filesize = &nofilesize;
101     if (!ctx->hooks.pedata)
102         ctx->hooks.pedata = &nopedata;
103 }
104 
105 static int cli_bytecode_context_reset(struct cli_bc_ctx *ctx);
cli_bytecode_context_alloc(void)106 struct cli_bc_ctx *cli_bytecode_context_alloc(void)
107 {
108     struct cli_bc_ctx *ctx = cli_calloc(1, sizeof(*ctx));
109     if (!ctx) {
110         cli_errmsg("Out of memory allocating cli_bytecode_context_reset\n");
111         return NULL;
112     }
113     ctx->bytecode_timeout = 60000;
114     cli_bytecode_context_reset(ctx);
115     return ctx;
116 }
117 
cli_bytecode_context_destroy(struct cli_bc_ctx * ctx)118 void cli_bytecode_context_destroy(struct cli_bc_ctx *ctx)
119 {
120     cli_bytecode_context_clear(ctx);
121     free(ctx);
122 }
123 
cli_bytecode_context_getresult_file(struct cli_bc_ctx * ctx,char ** tempfilename)124 int cli_bytecode_context_getresult_file(struct cli_bc_ctx *ctx, char **tempfilename)
125 {
126     int fd;
127     *tempfilename = ctx->tempfile;
128     fd            = ctx->outfd;
129     ctx->tempfile = NULL;
130     ctx->outfd    = 0;
131     return fd;
132 }
133 
134 /* resets bytecode state, so you can run another bytecode with same ctx */
cli_bytecode_context_reset(struct cli_bc_ctx * ctx)135 static int cli_bytecode_context_reset(struct cli_bc_ctx *ctx)
136 {
137     unsigned i;
138 
139     free(ctx->opsizes);
140     ctx->opsizes = NULL;
141 
142     free(ctx->values);
143     ctx->values = NULL;
144 
145     free(ctx->operands);
146     ctx->operands = NULL;
147 
148     if (ctx->outfd) {
149         cli_ctx *cctx = ctx->ctx;
150         if (ctx->outfd)
151             close(ctx->outfd);
152         if (ctx->tempfile && (!cctx || !cctx->engine->keeptmp)) {
153             cli_unlink(ctx->tempfile);
154         }
155         free(ctx->tempfile);
156         ctx->tempfile = NULL;
157         ctx->outfd    = 0;
158     }
159     if (ctx->jsnormdir) {
160         char fullname[1025];
161         cli_ctx *cctx = ctx->ctx;
162         int fd, ret = CL_CLEAN;
163 
164         if (!ctx->found) {
165             snprintf(fullname, 1024, "%s" PATHSEP "javascript", ctx->jsnormdir);
166             fd = open(fullname, O_RDONLY | O_BINARY);
167             if (fd >= 0) {
168                 cctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
169 
170                 ret = cli_scan_desc(fd, cctx, CL_TYPE_HTML, 0, NULL, AC_SCAN_VIR, NULL, NULL);
171                 if (ret == CL_CLEAN) {
172                     if (lseek(fd, 0, SEEK_SET) == -1)
173                         cli_dbgmsg("cli_bytecode: call to lseek() has failed\n");
174                     else {
175                         cctx->next_layer_is_normalized = true; // This flag ingested by cli_recursion_stack_push().
176 
177                         ret = cli_scan_desc(fd, cctx, CL_TYPE_TEXT_ASCII, 0, NULL, AC_SCAN_VIR, NULL, NULL);
178                     }
179                 }
180                 close(fd);
181             }
182         }
183         if (!cctx || !cctx->engine->keeptmp) {
184             cli_rmdirs(ctx->jsnormdir);
185         }
186         free(ctx->jsnormdir);
187         if (ret != CL_CLEAN)
188             ctx->found = 1;
189     }
190     ctx->numParams = 0;
191     ctx->funcid    = 0;
192     /* don't touch fmap, file_size, and hooks, sections, ctx, timeout, pdf* */
193     ctx->off           = 0;
194     ctx->written       = 0;
195     ctx->jsnormwritten = 0;
196 #if USE_MPOOL
197     if (ctx->mpool) {
198         mpool_destroy(ctx->mpool);
199         ctx->mpool = NULL;
200     }
201 #else
202     /*TODO: implement for no-mmap case too*/
203 #endif
204     for (i = 0; i < ctx->ninflates; i++)
205         cli_bcapi_inflate_done(ctx, i);
206     free(ctx->inflates);
207     ctx->inflates  = NULL;
208     ctx->ninflates = 0;
209 
210     for (i = 0; i < ctx->nlzmas; i++)
211         cli_bcapi_lzma_done(ctx, i);
212     free(ctx->lzmas);
213     ctx->lzmas  = NULL;
214     ctx->nlzmas = 0;
215 
216 #if HAVE_BZLIB_H
217     for (i = 0; i < ctx->nbzip2s; i++)
218         cli_bcapi_bzip2_done(ctx, i);
219     free(ctx->bzip2s);
220     ctx->bzip2s  = NULL;
221     ctx->nbzip2s = 0;
222 #endif
223 
224     for (i = 0; i < ctx->nbuffers; i++)
225         cli_bcapi_buffer_pipe_done(ctx, i);
226     free(ctx->buffers);
227     ctx->buffers  = NULL;
228     ctx->nbuffers = 0;
229 
230     for (i = 0; i < ctx->nhashsets; i++)
231         cli_bcapi_hashset_done(ctx, i);
232     free(ctx->hashsets);
233     ctx->hashsets  = NULL;
234     ctx->nhashsets = 0;
235 
236     for (i = 0; i < ctx->njsnorms; i++)
237         cli_bcapi_jsnorm_done(ctx, i);
238     free(ctx->jsnorms);
239     ctx->jsnorms   = NULL;
240     ctx->njsnorms  = 0;
241     ctx->jsnormdir = NULL;
242 
243     for (i = 0; i < ctx->nmaps; i++)
244         cli_bcapi_map_done(ctx, i);
245     free(ctx->maps);
246     ctx->maps  = NULL;
247     ctx->nmaps = 0;
248 
249     /* Use input_switch() to free the extracted file fmap, if one exists */
250     cli_bcapi_input_switch(ctx, 0);
251 
252 #if HAVE_JSON
253     free((json_object **)(ctx->jsonobjs));
254     ctx->jsonobjs  = NULL;
255     ctx->njsonobjs = 0;
256 #endif
257 
258     ctx->containertype = CL_TYPE_ANY;
259     return CL_SUCCESS;
260 }
261 
cli_bytecode_context_clear(struct cli_bc_ctx * ctx)262 int cli_bytecode_context_clear(struct cli_bc_ctx *ctx)
263 {
264     cli_bytecode_context_reset(ctx);
265     memset(ctx, 0, sizeof(*ctx));
266     return CL_SUCCESS;
267 }
268 
typesize(const struct cli_bc * bc,uint16_t type)269 static unsigned typesize(const struct cli_bc *bc, uint16_t type)
270 {
271     struct cli_bc_type *ty;
272     unsigned j;
273 
274     type &= 0x7fff;
275     if (!type)
276         return 0;
277     if (type <= 8)
278         return 1;
279     if (type <= 16)
280         return 2;
281     if (type <= 32)
282         return 4;
283     if (type <= 64)
284         return 8;
285     ty = &bc->types[type - 65];
286     if (ty->size)
287         return ty->size;
288     switch (ty->kind) {
289         case 2:
290         case 3:
291             for (j = 0; j < ty->numElements; j++)
292                 ty->size += typesize(bc, ty->containedTypes[j]);
293             break;
294         case 4:
295             ty->size = ty->numElements * typesize(bc, ty->containedTypes[0]);
296             break;
297         default:
298             break;
299     }
300     if (!ty->size && ty->kind != DFunctionType) {
301         cli_warnmsg("type %d size is 0\n", type - 65);
302     }
303     return ty->size;
304 }
305 
typealign(const struct cli_bc * bc,uint16_t type)306 static unsigned typealign(const struct cli_bc *bc, uint16_t type)
307 {
308     type &= 0x7fff;
309     if (type <= 64) {
310         unsigned size = typesize(bc, type);
311         return size ? size : 1;
312     }
313     return bc->types[type - 65].align;
314 }
315 
cli_bytecode_context_setfuncid(struct cli_bc_ctx * ctx,const struct cli_bc * bc,unsigned funcid)316 int cli_bytecode_context_setfuncid(struct cli_bc_ctx *ctx, const struct cli_bc *bc, unsigned funcid)
317 {
318     unsigned i, s = 0;
319     const struct cli_bc_func *func;
320     if (funcid >= bc->num_func) {
321         cli_errmsg("bytecode: function ID doesn't exist: %u\n", funcid);
322         return CL_EARG;
323     }
324     func = ctx->func = &bc->funcs[funcid];
325     ctx->bc          = bc;
326     ctx->numParams   = func->numArgs;
327     ctx->funcid      = funcid;
328     if (func->numArgs) {
329         ctx->operands = cli_malloc(sizeof(*ctx->operands) * func->numArgs);
330         if (!ctx->operands) {
331             cli_errmsg("bytecode: error allocating memory for parameters\n");
332             return CL_EMEM;
333         }
334         ctx->opsizes = cli_malloc(sizeof(*ctx->opsizes) * func->numArgs);
335         if (!ctx->opsizes) {
336             cli_errmsg("bytecode: error allocating memory for opsizes\n");
337             return CL_EMEM;
338         }
339         for (i = 0; i < func->numArgs; i++) {
340             unsigned al          = typealign(bc, func->types[i]);
341             s                    = (s + al - 1) & ~(al - 1);
342             ctx->operands[i]     = s;
343             s += ctx->opsizes[i] = typesize(bc, func->types[i]);
344         }
345     }
346     s += 8; /* return value */
347     ctx->bytes  = s;
348     ctx->values = cli_malloc(s);
349     if (!ctx->values) {
350         cli_errmsg("bytecode: error allocating memory for parameters\n");
351         return CL_EMEM;
352     }
353     return CL_SUCCESS;
354 }
355 
type_isint(uint16_t type)356 static inline int type_isint(uint16_t type)
357 {
358     return type > 0 && type <= 64;
359 }
360 
cli_bytecode_context_setparam_int(struct cli_bc_ctx * ctx,unsigned i,uint64_t c)361 int cli_bytecode_context_setparam_int(struct cli_bc_ctx *ctx, unsigned i, uint64_t c)
362 {
363     if (i >= ctx->numParams) {
364         cli_errmsg("bytecode: param index out of bounds: %u\n", i);
365         return CL_EARG;
366     }
367     if (!type_isint(ctx->func->types[i])) {
368         cli_errmsg("bytecode: parameter type mismatch\n");
369         return CL_EARG;
370     }
371     switch (ctx->opsizes[i]) {
372         case 1:
373             ctx->values[ctx->operands[i]] = c;
374             break;
375         case 2:
376             *(uint16_t *)&ctx->values[ctx->operands[i]] = c;
377             break;
378         case 4:
379             *(uint32_t *)&ctx->values[ctx->operands[i]] = c;
380             break;
381         case 8:
382             *(uint64_t *)&ctx->values[ctx->operands[i]] = c;
383             break;
384     }
385     return CL_SUCCESS;
386 }
387 
cli_bytecode_context_setparam_ptr(struct cli_bc_ctx * ctx,unsigned i,void * data,unsigned datalen)388 int cli_bytecode_context_setparam_ptr(struct cli_bc_ctx *ctx, unsigned i, void *data, unsigned datalen)
389 {
390     UNUSEDPARAM(ctx);
391     UNUSEDPARAM(i);
392     UNUSEDPARAM(data);
393     UNUSEDPARAM(datalen);
394     cli_errmsg("Pointer parameters are not implemented yet!\n");
395     return CL_EARG;
396 }
397 
readNumber(const unsigned char * p,unsigned * off,unsigned len,char * ok)398 static inline uint64_t readNumber(const unsigned char *p, unsigned *off, unsigned len, char *ok)
399 {
400     uint64_t n = 0;
401     unsigned i, newoff, lim, p0 = p[*off], shift = 0;
402 
403     lim = p0 - 0x60;
404     if (lim > 0x10) {
405         cli_errmsg("Invalid number type: %c\n", p0);
406         *ok = 0;
407         return 0;
408     }
409     newoff = *off + lim + 1;
410     if (newoff > len) {
411         cli_errmsg("End of line encountered while reading number\n");
412         *ok = 0;
413         return 0;
414     }
415 
416     if (p0 == 0x60) {
417         *off = newoff;
418         return 0;
419     }
420 
421     for (i = *off + 1; i < newoff; i++) {
422         uint64_t v = p[i];
423         if (UNLIKELY((v & 0xf0) != 0x60)) {
424             cli_errmsg("Invalid number part: %c\n", (char)v);
425             *ok = 0;
426             return 0;
427         }
428         v &= 0xf;
429         v <<= shift;
430         n |= v;
431         shift += 4;
432     }
433     *off = newoff;
434     return n;
435 }
436 
readFuncID(struct cli_bc * bc,unsigned char * p,unsigned * off,unsigned len,char * ok)437 static inline funcid_t readFuncID(struct cli_bc *bc, unsigned char *p,
438                                   unsigned *off, unsigned len, char *ok)
439 {
440     funcid_t id = readNumber(p, off, len, ok) - 1;
441     if (*ok && id >= bc->num_func) {
442         cli_errmsg("Called function out of range: %u >= %u\n", id, bc->num_func);
443         *ok = 0;
444         return ~0;
445     }
446     return id;
447 }
448 
readAPIFuncID(struct cli_bc * bc,unsigned char * p,unsigned * off,unsigned len,char * ok)449 static inline funcid_t readAPIFuncID(struct cli_bc *bc, unsigned char *p,
450                                      unsigned *off, unsigned len, char *ok)
451 {
452     funcid_t id = readNumber(p, off, len, ok) - 1;
453     if (*ok && !cli_bitset_test(bc->uses_apis, id)) {
454         cli_errmsg("Called undeclared API function: %u\n", id);
455         *ok = 0;
456         return ~0;
457     }
458     return id;
459 }
460 
readFixedNumber(const unsigned char * p,unsigned * off,unsigned len,char * ok,unsigned width)461 static inline unsigned readFixedNumber(const unsigned char *p, unsigned *off,
462                                        unsigned len, char *ok, unsigned width)
463 {
464     unsigned i, n = 0, shift = 0;
465     unsigned newoff = *off + width;
466     if (newoff > len) {
467         cli_errmsg("Newline encountered while reading number\n");
468         *ok = 0;
469         return 0;
470     }
471     for (i = *off; i < newoff; i++) {
472         unsigned v = p[i];
473         if (UNLIKELY((v & 0xf0) != 0x60)) {
474             cli_errmsg("Invalid number part: %c\n", v);
475             *ok = 0;
476             return 0;
477         }
478         v &= 0xf;
479         v <<= shift;
480         n |= v;
481         shift += 4;
482     }
483     *off = newoff;
484     return n;
485 }
486 
readOperand(struct cli_bc_func * func,unsigned char * p,unsigned * off,unsigned len,char * ok)487 static inline operand_t readOperand(struct cli_bc_func *func, unsigned char *p,
488                                     unsigned *off, unsigned len, char *ok)
489 {
490     uint64_t v;
491     if ((p[*off] & 0xf0) == 0x40 || p[*off] == 0x50) {
492         uint64_t *dest;
493         uint16_t ty;
494         p[*off] |= 0x20;
495         /* TODO: unique constants */
496         func->constants = cli_realloc2(func->constants, (func->numConstants + 1) * sizeof(*func->constants));
497         if (!func->constants) {
498             *ok = 0;
499             return MAX_OP;
500         }
501         v    = readNumber(p, off, len, ok);
502         dest = &func->constants[func->numConstants];
503         /* Write the constant to the correct place according to its type.
504 	 * This is needed on big-endian machines, because constants are always
505 	 * read as u64, but accessed as one of these types: u8, u16, u32, u64 */
506         *dest = 0;
507         ty    = 8 * readFixedNumber(p, off, len, ok, 1);
508         if (!ty) {
509             /* This is a global variable */
510             return 0x80000000 | v;
511         }
512         if (ty <= 8)
513             *(uint8_t *)dest = v;
514         else if (ty <= 16)
515             *(uint16_t *)dest = v;
516         else if (ty <= 32)
517             *(uint32_t *)dest = v;
518         else
519             *dest = v;
520         return func->numValues + func->numConstants++;
521     }
522     v = readNumber(p, off, len, ok);
523     if (!*ok)
524         return MAX_OP;
525     if (v >= func->numValues) {
526         cli_errmsg("Operand index exceeds bounds: %u >= %u!\n", (unsigned)v, (unsigned)func->numValues);
527         *ok = 0;
528         return MAX_OP;
529     }
530     return v;
531 }
532 
readData(const unsigned char * p,unsigned * off,unsigned len,char * ok,unsigned * datalen)533 static inline char *readData(const unsigned char *p, unsigned *off, unsigned len, char *ok, unsigned *datalen)
534 {
535     unsigned char *dat, *q;
536     unsigned l, newoff, i;
537     if (p[*off] != '|') {
538         cli_errmsg("Data start marker missing: %c\n", p[*off]);
539         *ok = 0;
540         return NULL;
541     }
542     (*off)++;
543     l = readNumber(p, off, len, ok);
544     if (!l || !ok) {
545         *datalen = l;
546         return NULL;
547     }
548     newoff = *off + 2 * l;
549     if (newoff > len) {
550         cli_errmsg("Line ended while reading data\n");
551         *ok = 0;
552         return 0;
553     }
554     dat = cli_malloc(l);
555     if (!dat) {
556         cli_errmsg("Cannot allocate memory for data\n");
557         *ok = 0;
558         return NULL;
559     }
560     q = dat;
561     for (i = *off; i < newoff; i += 2) {
562         const unsigned char v0 = p[i];
563         const unsigned char v1 = p[i + 1];
564         if (UNLIKELY((v0 & 0xf0) != 0x60 || (v1 & 0xf0) != 0x60)) {
565             cli_errmsg("Invalid data part: %c%c\n", v0, v1);
566             *ok = 0;
567             free(dat);
568             return 0;
569         }
570         *q++ = (v0 & 0xf) | ((v1 & 0xf) << 4);
571     }
572     *off     = newoff;
573     *datalen = l;
574     return (char *)dat;
575 }
576 
readString(const unsigned char * p,unsigned * off,unsigned len,char * ok)577 static inline char *readString(const unsigned char *p, unsigned *off, unsigned len, char *ok)
578 {
579     unsigned stringlen = 0;
580     char *str          = readData(p, off, len, ok, &stringlen);
581     if (*ok && stringlen && str[stringlen - 1] != '\0') {
582         str[stringlen - 1] = '\0';
583         cli_errmsg("bytecode: string missing \\0 terminator: %s\n", str);
584         free(str);
585         *ok = 0;
586         return NULL;
587     }
588     return str;
589 }
590 
parseHeader(struct cli_bc * bc,unsigned char * buffer,unsigned * linelength)591 static int parseHeader(struct cli_bc *bc, unsigned char *buffer, unsigned *linelength)
592 {
593     uint64_t magic1;
594     unsigned magic2;
595     char ok = 1;
596     unsigned offset, len, flevel;
597     char *pos;
598 
599     if (strncmp((const char *)buffer, BC_HEADER, sizeof(BC_HEADER) - 1)) {
600         cli_errmsg("Missing file magic in bytecode");
601         return CL_EMALFDB;
602     }
603     offset                   = sizeof(BC_HEADER) - 1;
604     len                      = strlen((const char *)buffer);
605     bc->metadata.formatlevel = readNumber(buffer, &offset, len, &ok);
606     if (!ok) {
607         cli_errmsg("Unable to parse (format) functionality level in bytecode header\n");
608         return CL_EMALFDB;
609     }
610     /* we support 2 bytecode formats */
611     if (bc->metadata.formatlevel != BC_FORMAT_096 &&
612         bc->metadata.formatlevel != BC_FORMAT_LEVEL) {
613         cli_dbgmsg("Skipping bytecode with (format) functionality level: %u (current %u)\n",
614                    bc->metadata.formatlevel, BC_FORMAT_LEVEL);
615         return CL_BREAK;
616     }
617     /* Optimistic parsing, check for error only at the end.*/
618     bc->metadata.timestamp     = readNumber(buffer, &offset, len, &ok);
619     bc->metadata.sigmaker      = readString(buffer, &offset, len, &ok);
620     bc->metadata.targetExclude = readNumber(buffer, &offset, len, &ok);
621     bc->kind                   = readNumber(buffer, &offset, len, &ok);
622     bc->metadata.minfunc       = readNumber(buffer, &offset, len, &ok);
623     bc->metadata.maxfunc       = readNumber(buffer, &offset, len, &ok);
624     flevel                     = cl_retflevel();
625     /* in 0.96 these 2 fields are unused / zero, in post 0.96 these mean
626      * min/max flevel.
627      * So 0 for min/max means no min/max
628      * Note that post 0.96 bytecode/bytecode lsig needs format 7, because
629      * 0.96 doesn't check lsig functionality level.
630      */
631     if ((bc->metadata.minfunc && bc->metadata.minfunc > flevel) ||
632         (bc->metadata.maxfunc && bc->metadata.maxfunc < flevel)) {
633         cli_dbgmsg("Skipping bytecode with (engine) functionality level %u-%u (current %u)\n",
634                    bc->metadata.minfunc, bc->metadata.maxfunc, flevel);
635         return CL_BREAK;
636     }
637     bc->metadata.maxresource = readNumber(buffer, &offset, len, &ok);
638     bc->metadata.compiler    = readString(buffer, &offset, len, &ok);
639     bc->num_types            = readNumber(buffer, &offset, len, &ok);
640     bc->num_func             = readNumber(buffer, &offset, len, &ok);
641     bc->state                = bc_loaded;
642     bc->uses_apis            = NULL;
643     bc->dbgnodes             = NULL;
644     bc->dbgnode_cnt          = 0;
645     if (!ok) {
646         cli_errmsg("Invalid bytecode header at %u\n", offset);
647         return CL_EMALFDB;
648     }
649     magic1 = readNumber(buffer, &offset, len, &ok);
650     magic2 = readFixedNumber(buffer, &offset, len, &ok, 2);
651     if (!ok || magic1 != 0x53e5493e9f3d1c30ull || magic2 != 42) {
652         unsigned long m0 = magic1 >> 32;
653         unsigned long m1 = magic1;
654         cli_errmsg("Magic numbers don't match: %lx%lx, %u\n", m0, m1, magic2);
655         return CL_EMALFDB;
656     }
657     if (buffer[offset] != ':') {
658         cli_errmsg("Expected : but found: %c\n", buffer[offset]);
659         return CL_EMALFDB;
660     }
661     offset++;
662     *linelength = strtol((const char *)buffer + offset, &pos, 10);
663     if (*pos != '\0') {
664         cli_errmsg("Invalid number: %s\n", buffer + offset);
665         return CL_EMALFDB;
666     }
667 
668     bc->funcs = cli_calloc(bc->num_func, sizeof(*bc->funcs));
669     if (!bc->funcs) {
670         cli_errmsg("Out of memory allocating %u functions\n", bc->num_func);
671         return CL_EMEM;
672     }
673     bc->types = cli_calloc(bc->num_types, sizeof(*bc->types));
674     if (!bc->types) {
675         cli_errmsg("Out of memory allocating %u types\n", bc->num_types);
676         return CL_EMEM;
677     }
678     return CL_SUCCESS;
679 }
680 
parseLSig(struct cli_bc * bc,char * buffer)681 static int parseLSig(struct cli_bc *bc, char *buffer)
682 {
683     const char *prefix;
684     char *vnames, *vend = strchr(buffer, ';');
685     if (vend) {
686         bc->lsig = cli_strdup(buffer);
687         *vend++  = '\0';
688         prefix   = buffer;
689         vnames   = strchr(vend, '{');
690     } else {
691         /* Not a logical signature, but we still have a virusname */
692         bc->hook_name = cli_strdup(buffer);
693         bc->lsig      = NULL;
694     }
695 
696     return CL_SUCCESS;
697 }
698 
readTypeID(struct cli_bc * bc,unsigned char * buffer,unsigned * offset,unsigned len,char * ok)699 static uint16_t readTypeID(struct cli_bc *bc, unsigned char *buffer,
700                            unsigned *offset, unsigned len, char *ok)
701 {
702     uint64_t t = readNumber(buffer, offset, len, ok);
703     if (!ok)
704         return ~0;
705     if (t >= bc->num_types + bc->start_tid) {
706         cli_errmsg("Invalid type id: %llu\n", (unsigned long long)t);
707         *ok = 0;
708         return ~0;
709     }
710     return t;
711 }
712 
parseType(struct cli_bc * bc,struct cli_bc_type * ty,unsigned char * buffer,unsigned * off,unsigned len,char * ok)713 static void parseType(struct cli_bc *bc, struct cli_bc_type *ty,
714                       unsigned char *buffer, unsigned *off, unsigned len,
715                       char *ok)
716 {
717     unsigned j;
718 
719     ty->numElements = readNumber(buffer, off, len, ok);
720     if (!*ok) {
721         cli_errmsg("Error parsing type\n");
722         *ok = 0;
723         return;
724     }
725     ty->containedTypes = cli_malloc(sizeof(*ty->containedTypes) * ty->numElements);
726     if (!ty->containedTypes) {
727         cli_errmsg("Out of memory allocating %u types\n", ty->numElements);
728         *ok = 0;
729         return;
730     }
731     for (j = 0; j < ty->numElements; j++) {
732         ty->containedTypes[j] = readTypeID(bc, buffer, off, len, ok);
733     }
734 }
735 
736 static uint16_t containedTy[] = {8, 16, 32, 64};
737 
738 #define NUM_STATIC_TYPES 4
add_static_types(struct cli_bc * bc)739 static void add_static_types(struct cli_bc *bc)
740 {
741     unsigned i;
742     for (i = 0; i < NUM_STATIC_TYPES; i++) {
743         bc->types[i].kind           = DPointerType;
744         bc->types[i].numElements    = 1;
745         bc->types[i].containedTypes = &containedTy[i];
746         bc->types[i].size = bc->types[i].align = 8;
747     }
748 }
749 
parseTypes(struct cli_bc * bc,unsigned char * buffer)750 static int parseTypes(struct cli_bc *bc, unsigned char *buffer)
751 {
752     unsigned i, offset = 1, len = strlen((const char *)buffer);
753     char ok = 1;
754 
755     if (buffer[0] != 'T') {
756         cli_errmsg("Invalid function types header: %c\n", buffer[0]);
757         return CL_EMALFDB;
758     }
759     bc->start_tid = readFixedNumber(buffer, &offset, len, &ok, 2);
760     if (bc->start_tid != BC_START_TID) {
761         cli_warnmsg("Type start id mismatch: %u != %u\n", bc->start_tid,
762                     BC_START_TID);
763         return CL_BREAK;
764     }
765     add_static_types(bc);
766     for (i = (BC_START_TID - 65); i < bc->num_types - 1; i++) {
767         struct cli_bc_type *ty = &bc->types[i];
768         uint8_t t              = readFixedNumber(buffer, &offset, len, &ok, 1);
769         if (!ok) {
770             cli_errmsg("Error reading type kind\n");
771             return CL_EMALFDB;
772         }
773         switch (t) {
774             case 1:
775                 ty->kind = DFunctionType;
776                 ty->size = ty->align = sizeof(void *);
777                 parseType(bc, ty, buffer, &offset, len, &ok);
778                 if (!ok) {
779                     cli_errmsg("Error parsing type %u\n", i);
780                     return CL_EMALFDB;
781                 }
782                 if (!ty->numElements) {
783                     cli_errmsg("Function with no return type? %u\n", i);
784                     return CL_EMALFDB;
785                 }
786                 break;
787             case 2:
788             case 3:
789                 ty->kind = (t == 2) ? DPackedStructType : DStructType;
790                 ty->size = ty->align = 0; /* TODO:calculate size/align of structs */
791                 ty->align            = 8;
792                 parseType(bc, ty, buffer, &offset, len, &ok);
793                 if (!ok) {
794                     cli_errmsg("Error parsing type %u\n", i);
795                     return CL_EMALFDB;
796                 }
797                 break;
798             case 4:
799                 ty->kind = DArrayType;
800                 /* number of elements of array, not subtypes! */
801                 ty->numElements = readNumber(buffer, &offset, len, &ok);
802                 if (!ok) {
803                     cli_errmsg("Error parsing type %u\n", i);
804                     return CL_EMALFDB;
805                 }
806                 /* fall-through */
807             case 5:
808                 if (t == 5) {
809                     ty->kind        = DPointerType;
810                     ty->numElements = 1;
811                 }
812                 ty->containedTypes = cli_malloc(sizeof(*ty->containedTypes));
813                 if (!ty->containedTypes) {
814                     cli_errmsg("Out of memory allocating containedType\n");
815                     return CL_EMALFDB;
816                 }
817                 ty->containedTypes[0] = readTypeID(bc, buffer, &offset, len, &ok);
818                 if (!ok) {
819                     cli_errmsg("Error parsing type %u\n", i);
820                     return CL_EMALFDB;
821                 }
822                 if (t == 5) {
823                     /* for interpreter, pointers 64-bit there */
824                     ty->size = ty->align = 8;
825                 } else {
826                     ty->size  = ty->numElements * typesize(bc, ty->containedTypes[0]);
827                     ty->align = typealign(bc, ty->containedTypes[0]);
828                 }
829                 break;
830             default:
831                 cli_errmsg("Invalid type kind: %u\n", t);
832                 return CL_EMALFDB;
833         }
834     }
835     for (i = (BC_START_TID - 65); i < bc->num_types - 1; i++) {
836         struct cli_bc_type *ty = &bc->types[i];
837         if (ty->kind == DArrayType) {
838             ty->size  = ty->numElements * typesize(bc, ty->containedTypes[0]);
839             ty->align = typealign(bc, ty->containedTypes[0]);
840         }
841     }
842     return CL_SUCCESS;
843 }
844 
845 /* checks whether the type described by tid is the same as the one described by
846  * apitid. */
types_equal(const struct cli_bc * bc,uint16_t * apity2ty,uint16_t tid,uint16_t apitid)847 static int types_equal(const struct cli_bc *bc, uint16_t *apity2ty, uint16_t tid, uint16_t apitid)
848 {
849     unsigned i;
850     const struct cli_bc_type *ty    = &bc->types[tid - 65];
851     const struct cli_bc_type *apity = &cli_apicall_types[apitid];
852     /* If we've already verified type equality, return.
853      * Since we need to check equality of recursive types, we assume types are
854      * equal while checking equality of contained types, unless proven
855      * otherwise. */
856     if (apity2ty[apitid] == tid + 1)
857         return 1;
858     apity2ty[apitid] = tid + 1;
859 
860     if (ty->kind != apity->kind) {
861         cli_dbgmsg("bytecode: type kind mismatch: %u != %u\n", ty->kind, apity->kind);
862         return 0;
863     }
864     if (ty->numElements != apity->numElements) {
865         cli_dbgmsg("bytecode: type numElements mismatch: %u != %u\n", ty->numElements, apity->numElements);
866         return 0;
867     }
868     for (i = 0; i < ty->numElements; i++) {
869         if (apity->containedTypes[i] < BC_START_TID) {
870             if (ty->containedTypes[i] != apity->containedTypes[i]) {
871                 cli_dbgmsg("bytecode: contained type mismatch: %u != %u\n",
872                            ty->containedTypes[i], apity->containedTypes[i]);
873                 return 0;
874             }
875         } else if (!types_equal(bc, apity2ty, ty->containedTypes[i], apity->containedTypes[i] - BC_START_TID))
876             return 0;
877         if (ty->kind == DArrayType)
878             break; /* validated the contained type already */
879     }
880     return 1;
881 }
882 
parseApis(struct cli_bc * bc,unsigned char * buffer)883 static int parseApis(struct cli_bc *bc, unsigned char *buffer)
884 {
885     unsigned i, offset = 1, len = strlen((const char *)buffer), maxapi, calls;
886     char ok = 1;
887     uint16_t *apity2ty; /*map of api type to current bytecode type ID */
888 
889     if (buffer[0] != 'E') {
890         cli_errmsg("bytecode: Invalid api header: %c\n", buffer[0]);
891         return CL_EMALFDB;
892     }
893 
894     maxapi = readNumber(buffer, &offset, len, &ok);
895     if (!ok)
896         return CL_EMALFDB;
897     if (maxapi > cli_apicall_maxapi) {
898         cli_dbgmsg("bytecode using API %u, but highest API known to libclamav is %u, skipping\n", maxapi, cli_apicall_maxapi);
899         return CL_BREAK;
900     }
901     calls = readNumber(buffer, &offset, len, &ok);
902     if (!ok)
903         return CL_EMALFDB;
904     if (calls > maxapi) {
905         cli_errmsg("bytecode: attempting to describe more APIs than max: %u > %u\n", calls, maxapi);
906         return CL_EMALFDB;
907     }
908     bc->uses_apis = cli_bitset_init();
909     if (!bc->uses_apis) {
910         cli_errmsg("Out of memory allocating apis bitset\n");
911         return CL_EMEM;
912     }
913     apity2ty = cli_calloc(cli_apicall_maxtypes, sizeof(*cli_apicall_types));
914     if (!apity2ty) {
915         cli_errmsg("Out of memory allocating apity2ty\n");
916         return CL_EMEM;
917     }
918     for (i = 0; i < calls; i++) {
919         unsigned id  = readNumber(buffer, &offset, len, &ok);
920         uint16_t tid = readTypeID(bc, buffer, &offset, len, &ok);
921         char *name   = readString(buffer, &offset, len, &ok);
922 
923         /* validate APIcall prototype */
924         if (id > maxapi) {
925             cli_errmsg("bytecode: API id %u out of range, max %u\n", id, maxapi);
926             ok = 0;
927         }
928         /* API ids start from 1 */
929         id--;
930         if (ok && name && strcmp(cli_apicalls[id].name, name)) {
931             cli_errmsg("bytecode: API %u name mismatch: %s expected %s\n", id, name, cli_apicalls[id].name);
932             ok = 0;
933         }
934         if (ok && !types_equal(bc, apity2ty, tid, cli_apicalls[id].type)) {
935             cli_errmsg("bytecode: API %u prototype doesn't match\n", id);
936             ok = 0;
937         }
938         /* don't need the name anymore */
939         free(name);
940         if (!ok) {
941             free(apity2ty); /* free temporary map */
942             return CL_EMALFDB;
943         }
944 
945         /* APIcall is valid */
946         cli_bitset_set(bc->uses_apis, id);
947     }
948     free(apity2ty); /* free temporary map */
949     cli_dbgmsg("bytecode: Parsed %u APIcalls, maxapi %u\n", calls, maxapi);
950     return CL_SUCCESS;
951 }
952 
type_components(struct cli_bc * bc,uint16_t id,char * ok)953 static uint16_t type_components(struct cli_bc *bc, uint16_t id, char *ok)
954 {
955     unsigned i, sum = 0;
956     const struct cli_bc_type *ty;
957     if (id <= 64)
958         return 1;
959     ty = &bc->types[id - 65];
960     /* TODO: protect against recursive types */
961     switch (ty->kind) {
962         case DFunctionType:
963             cli_errmsg("bytecode: function type not accepted for constant: %u\n", id);
964             /* don't accept functions as constant initializers */
965             *ok = 0;
966             return 0;
967         case DPointerType:
968             return 2;
969         case DStructType:
970         case DPackedStructType:
971             for (i = 0; i < ty->numElements; i++) {
972                 sum += type_components(bc, ty->containedTypes[i], ok);
973             }
974             return sum;
975         case DArrayType:
976             return type_components(bc, ty->containedTypes[0], ok) * ty->numElements;
977         default:
978             *ok = 0;
979             return 0;
980     }
981 }
982 
readConstant(struct cli_bc * bc,unsigned i,unsigned comp,unsigned char * buffer,unsigned * offset,unsigned len,char * ok)983 static void readConstant(struct cli_bc *bc, unsigned i, unsigned comp,
984                          unsigned char *buffer, unsigned *offset,
985                          unsigned len, char *ok)
986 {
987     unsigned j = 0;
988     if (*ok && buffer[*offset] == 0x40 &&
989         buffer[*offset + 1] == 0x60) {
990         /* zero initializer */
991         memset(bc->globals[i], 0, sizeof(*bc->globals[0]) * comp);
992         (*offset) += 2;
993         return;
994     }
995     while (*ok && buffer[*offset] != 0x60) {
996         if (j >= comp) {
997             cli_errmsg("bytecode: constant has too many subcomponents, expected %u\n", comp);
998             *ok = 0;
999             return;
1000         }
1001         buffer[*offset] |= 0x20;
1002         bc->globals[i][j++] = readNumber(buffer, offset, len, ok);
1003     }
1004     if (*ok && j != comp) {
1005         cli_errmsg("bytecode: constant has too few subcomponents: %u < %u\n", j, comp);
1006         *ok = 0;
1007     }
1008     (*offset)++;
1009 }
1010 
1011 /* parse constant globals with constant initializers */
parseGlobals(struct cli_bc * bc,unsigned char * buffer)1012 static int parseGlobals(struct cli_bc *bc, unsigned char *buffer)
1013 {
1014     unsigned i, offset = 1, len = strlen((const char *)buffer), numglobals;
1015     unsigned maxglobal;
1016     char ok = 1;
1017 
1018     if (buffer[0] != 'G') {
1019         cli_errmsg("bytecode: Invalid globals header: %c\n", buffer[0]);
1020         return CL_EMALFDB;
1021     }
1022     maxglobal = readNumber(buffer, &offset, len, &ok);
1023     if (maxglobal > cli_apicall_maxglobal) {
1024         cli_dbgmsg("bytecode using global %u, but highest global known to libclamav is %u, skipping\n", maxglobal, cli_apicall_maxglobal);
1025         return CL_BREAK;
1026     }
1027     numglobals  = readNumber(buffer, &offset, len, &ok);
1028     bc->globals = cli_calloc(numglobals, sizeof(*bc->globals));
1029     if (!bc->globals) {
1030         cli_errmsg("bytecode: OOM allocating memory for %u globals\n", numglobals);
1031         return CL_EMEM;
1032     }
1033     bc->globaltys = cli_calloc(numglobals, sizeof(*bc->globaltys));
1034     if (!bc->globaltys) {
1035         cli_errmsg("bytecode: OOM allocating memory for %u global types\n", numglobals);
1036         return CL_EMEM;
1037     }
1038     bc->num_globals = numglobals;
1039     if (!ok)
1040         return CL_EMALFDB;
1041     for (i = 0; i < numglobals; i++) {
1042         unsigned comp;
1043         bc->globaltys[i] = readTypeID(bc, buffer, &offset, len, &ok);
1044         comp             = type_components(bc, bc->globaltys[i], &ok);
1045         if (!ok)
1046             return CL_EMALFDB;
1047         bc->globals[i] = cli_malloc(sizeof(*bc->globals[0]) * comp);
1048         if (!bc->globals[i])
1049             return CL_EMEM;
1050         readConstant(bc, i, comp, buffer, &offset, len, &ok);
1051     }
1052     if (!ok)
1053         return CL_EMALFDB;
1054     if (offset != len) {
1055         cli_errmsg("Trailing garbage in globals: %d extra bytes\n",
1056                    len - offset);
1057         return CL_EMALFDB;
1058     }
1059     return CL_SUCCESS;
1060 }
1061 
parseMD(struct cli_bc * bc,unsigned char * buffer)1062 static int parseMD(struct cli_bc *bc, unsigned char *buffer)
1063 {
1064     unsigned offset = 1, len = strlen((const char *)buffer);
1065     unsigned numMD, i, b;
1066     char ok = 1;
1067     if (buffer[0] != 'D')
1068         return CL_EMALFDB;
1069     numMD = readNumber(buffer, &offset, len, &ok);
1070     if (!ok) {
1071         cli_errmsg("Unable to parse number of MD nodes\n");
1072         return CL_EMALFDB;
1073     }
1074     b = bc->dbgnode_cnt;
1075     bc->dbgnode_cnt += numMD;
1076     bc->dbgnodes = cli_realloc(bc->dbgnodes, bc->dbgnode_cnt * sizeof(*bc->dbgnodes));
1077     if (!bc->dbgnodes)
1078         return CL_EMEM;
1079     for (i = 0; i < numMD; i++) {
1080         unsigned j;
1081         struct cli_bc_dbgnode_element *elts;
1082         unsigned el = readNumber(buffer, &offset, len, &ok);
1083         if (!ok) {
1084             cli_errmsg("Unable to parse number of elements\n");
1085             return CL_EMALFDB;
1086         }
1087         bc->dbgnodes[b + i].numelements = el;
1088         bc->dbgnodes[b + i].elements = elts = cli_calloc(el, sizeof(*elts));
1089         if (!elts)
1090             return CL_EMEM;
1091         for (j = 0; j < el; j++) {
1092             if (buffer[offset] == '|') {
1093                 elts[j].string = readData(buffer, &offset, len, &ok, &elts[j].len);
1094                 if (!ok)
1095                     return CL_EMALFDB;
1096             } else {
1097                 elts[j].len = readNumber(buffer, &offset, len, &ok);
1098                 if (!ok)
1099                     return CL_EMALFDB;
1100                 if (elts[j].len) {
1101                     elts[j].constant = readNumber(buffer, &offset, len, &ok);
1102                 } else
1103                     elts[j].nodeid = readNumber(buffer, &offset, len, &ok);
1104                 if (!ok)
1105                     return CL_EMALFDB;
1106             }
1107         }
1108     }
1109     cli_dbgmsg("bytecode: Parsed %u nodes total\n", bc->dbgnode_cnt);
1110     return CL_SUCCESS;
1111 }
1112 
parseFunctionHeader(struct cli_bc * bc,unsigned fn,unsigned char * buffer)1113 static int parseFunctionHeader(struct cli_bc *bc, unsigned fn, unsigned char *buffer)
1114 {
1115     char ok = 1;
1116     unsigned offset, len, all_locals = 0, i;
1117     struct cli_bc_func *func;
1118 
1119     if (fn >= bc->num_func) {
1120         cli_errmsg("Found more functions than declared: %u >= %u\n", fn,
1121                    bc->num_func);
1122         return CL_EMALFDB;
1123     }
1124     func = &bc->funcs[fn];
1125     len  = strlen((const char *)buffer);
1126 
1127     if (buffer[0] != 'A') {
1128         cli_errmsg("Invalid function arguments header: %c\n", buffer[0]);
1129         return CL_EMALFDB;
1130     }
1131     offset           = 1;
1132     func->numArgs    = readFixedNumber(buffer, &offset, len, &ok, 1);
1133     func->returnType = readTypeID(bc, buffer, &offset, len, &ok);
1134     if (buffer[offset] != 'L') {
1135         cli_errmsg("Invalid function locals header: %c\n", buffer[offset]);
1136         return CL_EMALFDB;
1137     }
1138     offset++;
1139     func->numLocals = readNumber(buffer, &offset, len, &ok);
1140     if (!ok) {
1141         cli_errmsg("Invalid number of arguments/locals\n");
1142         return CL_EMALFDB;
1143     }
1144     all_locals = func->numArgs + func->numLocals;
1145     if (!all_locals) {
1146         func->types = NULL;
1147     } else {
1148         func->types = cli_calloc(all_locals, sizeof(*func->types));
1149         if (!func->types) {
1150             cli_errmsg("Out of memory allocating function arguments\n");
1151             return CL_EMEM;
1152         }
1153     }
1154     for (i = 0; i < all_locals; i++) {
1155         func->types[i] = readNumber(buffer, &offset, len, &ok);
1156         if (readFixedNumber(buffer, &offset, len, &ok, 1))
1157             func->types[i] |= 0x8000;
1158     }
1159     if (!ok) {
1160         cli_errmsg("Invalid local types\n");
1161         return CL_EMALFDB;
1162     }
1163     if (buffer[offset] != 'F') {
1164         cli_errmsg("Invalid function body header: %c\n", buffer[offset]);
1165         return CL_EMALFDB;
1166     }
1167     offset++;
1168     func->numInsts = readNumber(buffer, &offset, len, &ok);
1169     if (!ok) {
1170         cli_errmsg("Invalid instructions count\n");
1171         return CL_EMALFDB;
1172     }
1173     func->numValues    = func->numArgs + func->numLocals;
1174     func->insn_idx     = 0;
1175     func->numConstants = 0;
1176     func->allinsts     = cli_calloc(func->numInsts, sizeof(*func->allinsts));
1177     if (!func->allinsts) {
1178         cli_errmsg("Out of memory allocating instructions\n");
1179         return CL_EMEM;
1180     }
1181     func->numBB = readNumber(buffer, &offset, len, &ok);
1182     if (!ok) {
1183         cli_errmsg("Invalid basic block count\n");
1184         return CL_EMALFDB;
1185     }
1186     func->BB = cli_calloc(func->numBB, sizeof(*func->BB));
1187     if (!func->BB) {
1188         cli_errmsg("Out of memory allocating basic blocks\n");
1189         return CL_EMEM;
1190     }
1191     return CL_SUCCESS;
1192 }
1193 
readBBID(struct cli_bc_func * func,const unsigned char * buffer,unsigned * off,unsigned len,char * ok)1194 static bbid_t readBBID(struct cli_bc_func *func, const unsigned char *buffer, unsigned *off, unsigned len, char *ok)
1195 {
1196     unsigned id = readNumber(buffer, off, len, ok);
1197     if (!id || id >= func->numBB) {
1198         cli_errmsg("Basic block ID out of range: %u\n", id);
1199         *ok = 0;
1200     }
1201     if (!*ok)
1202         return ~0;
1203     return id;
1204 }
1205 
1206 /*
1207 static uint16_t get_type(struct cli_bc_func *func, operand_t op)
1208 {
1209     if (op >= func->numValues)
1210 	return 64;
1211     return func->types[op];
1212 }*/
get_optype(const struct cli_bc_func * bcfunc,operand_t op)1213 static int16_t get_optype(const struct cli_bc_func *bcfunc, operand_t op)
1214 {
1215     if (op >= bcfunc->numArgs + bcfunc->numLocals)
1216         return 0;
1217     return bcfunc->types[op] & 0x7fff;
1218 }
1219 
parseBB(struct cli_bc * bc,unsigned func,unsigned bb,unsigned char * buffer)1220 static int parseBB(struct cli_bc *bc, unsigned func, unsigned bb, unsigned char *buffer)
1221 {
1222     char ok = 1;
1223     unsigned offset, len, i, last = 0;
1224     struct cli_bc_bb *BB;
1225     struct cli_bc_func *bcfunc = &bc->funcs[func];
1226     struct cli_bc_inst inst;
1227 
1228     if (bb >= bcfunc->numBB) {
1229         cli_errmsg("Found too many basic blocks\n");
1230         return CL_EMALFDB;
1231     }
1232 
1233     BB  = &bcfunc->BB[bb];
1234     len = strlen((const char *)buffer);
1235     if (buffer[0] != 'B') {
1236         cli_errmsg("Invalid basic block header: %c\n", buffer[0]);
1237         return CL_EMALFDB;
1238     }
1239     offset       = 1;
1240     BB->numInsts = 0;
1241     BB->insts    = &bcfunc->allinsts[bcfunc->insn_idx];
1242     while (!last) {
1243         unsigned numOp;
1244         if (buffer[offset] == 'T') {
1245             last = 1;
1246             offset++;
1247             /* terminators are void */
1248             inst.type = 0;
1249             inst.dest = 0;
1250         } else {
1251             inst.type = readNumber(buffer, &offset, len, &ok);
1252             inst.dest = readNumber(buffer, &offset, len, &ok);
1253         }
1254         inst.opcode = readFixedNumber(buffer, &offset, len, &ok, 2);
1255         if (!ok) {
1256             cli_errmsg("Invalid type or operand\n");
1257             return CL_EMALFDB;
1258         }
1259         if (inst.opcode >= OP_BC_INVALID) {
1260             cli_errmsg("Invalid opcode: %u\n", inst.opcode);
1261             return CL_EMALFDB;
1262         }
1263 
1264         switch (inst.opcode) {
1265             case OP_BC_JMP:
1266                 inst.u.jump = readBBID(bcfunc, buffer, &offset, len, &ok);
1267                 break;
1268             case OP_BC_RET:
1269                 inst.type      = readNumber(buffer, &offset, len, &ok);
1270                 inst.u.unaryop = readOperand(bcfunc, buffer, &offset, len, &ok);
1271                 break;
1272             case OP_BC_BRANCH:
1273                 inst.u.branch.condition = readOperand(bcfunc, buffer, &offset, len, &ok);
1274                 inst.u.branch.br_true   = readBBID(bcfunc, buffer, &offset, len, &ok);
1275                 inst.u.branch.br_false  = readBBID(bcfunc, buffer, &offset, len, &ok);
1276                 break;
1277             case OP_BC_CALL_API: /* fall-through */
1278             case OP_BC_CALL_DIRECT:
1279                 numOp = readFixedNumber(buffer, &offset, len, &ok, 1);
1280                 if (ok) {
1281                     inst.u.ops.numOps  = numOp;
1282                     inst.u.ops.opsizes = NULL;
1283                     if (!numOp) {
1284                         inst.u.ops.ops = NULL;
1285                     } else {
1286                         inst.u.ops.ops = cli_calloc(numOp, sizeof(*inst.u.ops.ops));
1287                         if (!inst.u.ops.ops) {
1288                             cli_errmsg("Out of memory allocating operands\n");
1289                             return CL_EMEM;
1290                         }
1291                     }
1292                     if (inst.opcode == OP_BC_CALL_DIRECT)
1293                         inst.u.ops.funcid = readFuncID(bc, buffer, &offset, len, &ok);
1294                     else
1295                         inst.u.ops.funcid = readAPIFuncID(bc, buffer, &offset, len, &ok);
1296                     for (i = 0; i < numOp; i++) {
1297                         inst.u.ops.ops[i] = readOperand(bcfunc, buffer, &offset, len, &ok);
1298                     }
1299                 }
1300                 break;
1301             case OP_BC_ZEXT:
1302             case OP_BC_SEXT:
1303             case OP_BC_TRUNC:
1304                 inst.u.cast.source = readOperand(bcfunc, buffer, &offset, len, &ok);
1305                 inst.u.cast.mask   = bcfunc->types[inst.u.cast.source];
1306                 if (inst.u.cast.mask == 1)
1307                     inst.u.cast.size = 0;
1308                 else if (inst.u.cast.mask <= 8)
1309                     inst.u.cast.size = 1;
1310                 else if (inst.u.cast.mask <= 16)
1311                     inst.u.cast.size = 2;
1312                 else if (inst.u.cast.mask <= 32)
1313                     inst.u.cast.size = 3;
1314                 else if (inst.u.cast.mask <= 64)
1315                     inst.u.cast.size = 4;
1316                 /* calculate mask */
1317                 if (inst.opcode != OP_BC_SEXT)
1318                     inst.u.cast.mask = inst.u.cast.mask != 64 ? (1ull << inst.u.cast.mask) - 1 : ~0ull;
1319                 break;
1320             case OP_BC_GEP1:
1321             case OP_BC_GEPZ:
1322                 inst.u.three[0] = readNumber(buffer, &offset, len, &ok);
1323                 inst.u.three[1] = readOperand(bcfunc, buffer, &offset, len, &ok);
1324                 inst.u.three[2] = readOperand(bcfunc, buffer, &offset, len, &ok);
1325                 break;
1326             case OP_BC_GEPN:
1327                 numOp = readFixedNumber(buffer, &offset, len, &ok, 1);
1328                 if (ok) {
1329                     inst.u.ops.numOps  = numOp + 2;
1330                     inst.u.ops.opsizes = NULL;
1331                     inst.u.ops.ops     = cli_calloc(numOp + 2, sizeof(*inst.u.ops.ops));
1332                     if (!inst.u.ops.ops) {
1333                         cli_errmsg("Out of memory allocating operands\n");
1334                         return CL_EMEM;
1335                     }
1336                     inst.u.ops.ops[0] = readNumber(buffer, &offset, len, &ok);
1337                     for (i = 1; i < numOp + 2; i++)
1338                         inst.u.ops.ops[i] = readOperand(bcfunc, buffer, &offset, len, &ok);
1339                 }
1340                 break;
1341             case OP_BC_ICMP_EQ:
1342             case OP_BC_ICMP_NE:
1343             case OP_BC_ICMP_UGT:
1344             case OP_BC_ICMP_UGE:
1345             case OP_BC_ICMP_ULT:
1346             case OP_BC_ICMP_ULE:
1347             case OP_BC_ICMP_SGT:
1348             case OP_BC_ICMP_SGE:
1349             case OP_BC_ICMP_SLE:
1350             case OP_BC_ICMP_SLT:
1351                 /* instruction type must be correct before readOperand! */
1352                 inst.type = readNumber(buffer, &offset, len, &ok);
1353                 /* fall-through */
1354             default:
1355                 numOp = operand_counts[inst.opcode];
1356                 switch (numOp) {
1357                     case 0:
1358                         break;
1359                     case 1:
1360                         inst.u.unaryop = readOperand(bcfunc, buffer, &offset, len, &ok);
1361                         break;
1362                     case 2:
1363                         inst.u.binop[0] = readOperand(bcfunc, buffer, &offset, len, &ok);
1364                         inst.u.binop[1] = readOperand(bcfunc, buffer, &offset, len, &ok);
1365                         break;
1366                     case 3:
1367                         inst.u.three[0] = readOperand(bcfunc, buffer, &offset, len, &ok);
1368                         inst.u.three[1] = readOperand(bcfunc, buffer, &offset, len, &ok);
1369                         inst.u.three[2] = readOperand(bcfunc, buffer, &offset, len, &ok);
1370                         break;
1371                     default:
1372                         cli_errmsg("Opcode %u with too many operands: %u?\n", inst.opcode, numOp);
1373                         ok = 0;
1374                         break;
1375                 }
1376         }
1377         if (inst.opcode == OP_BC_STORE) {
1378             int16_t t = get_optype(bcfunc, inst.u.binop[0]);
1379             if (t)
1380                 inst.type = t;
1381         }
1382         if (inst.opcode == OP_BC_COPY)
1383             inst.type = get_optype(bcfunc, inst.u.binop[1]);
1384         if (!ok) {
1385             cli_errmsg("Invalid instructions or operands\n");
1386             return CL_EMALFDB;
1387         }
1388         if (bcfunc->insn_idx + BB->numInsts >= bcfunc->numInsts) {
1389             cli_errmsg("More instructions than declared in total: %u > %u!\n",
1390                        bcfunc->insn_idx + BB->numInsts, bcfunc->numInsts);
1391             return CL_EMALFDB;
1392         }
1393         inst.interp_op = inst.opcode * 5;
1394         if (inst.type > 1) {
1395             if (inst.type <= 8)
1396                 inst.interp_op += 1;
1397             else if (inst.type <= 16)
1398                 inst.interp_op += 2;
1399             else if (inst.type <= 32)
1400                 inst.interp_op += 3;
1401             else if (inst.type <= 65)
1402                 inst.interp_op += 4;
1403             else {
1404                 cli_dbgmsg("unknown inst type: %d\n", inst.type);
1405             }
1406         }
1407         BB->insts[BB->numInsts++] = inst;
1408     }
1409     if (bb + 1 == bc->funcs[func].numBB) {
1410         if (buffer[offset] != 'E') {
1411             cli_errmsg("Missing basicblock terminator, got: %c\n", buffer[offset]);
1412             return CL_EMALFDB;
1413         }
1414         offset++;
1415     }
1416     if (buffer[offset] == 'D') {
1417         uint32_t num;
1418         offset += 3;
1419         if (offset >= len)
1420             return CL_EMALFDB;
1421         num = (uint32_t)readNumber(buffer, &offset, len, &ok);
1422         if (!ok)
1423             return CL_EMALFDB;
1424         if (num != bcfunc->numInsts) {
1425             cli_errmsg("invalid number of dbg nodes, expected: %u, got: %u\n", bcfunc->numInsts, num);
1426             return CL_EMALFDB;
1427         }
1428         bcfunc->dbgnodes = cli_malloc(num * sizeof(*bcfunc->dbgnodes));
1429         if (!bcfunc->dbgnodes) {
1430             cli_errmsg("Unable to allocate memory for dbg nodes: %u\n", num * (uint32_t)sizeof(*bcfunc->dbgnodes));
1431             return CL_EMEM;
1432         }
1433         for (i = 0; (uint32_t)i < num; i++) {
1434             bcfunc->dbgnodes[i] = readNumber(buffer, &offset, len, &ok);
1435             if (!ok)
1436                 return CL_EMALFDB;
1437         }
1438     }
1439     if (offset != len) {
1440         cli_errmsg("Trailing garbage in basicblock: %d extra bytes\n",
1441                    len - offset);
1442         return CL_EMALFDB;
1443     }
1444     bcfunc->numBytes = 0;
1445     bcfunc->insn_idx += BB->numInsts;
1446     return CL_SUCCESS;
1447 }
1448 
1449 enum parse_state {
1450     PARSE_BC_TYPES = 0,
1451     PARSE_BC_APIS,
1452     PARSE_BC_GLOBALS,
1453     PARSE_BC_LSIG,
1454     PARSE_MD_OPT_HEADER,
1455     PARSE_FUNC_HEADER,
1456     PARSE_BB,
1457     PARSE_SKIP
1458 };
1459 
1460 struct sigperf_elem {
1461     const char *bc_name;
1462     uint64_t usecs;
1463     unsigned long run_count;
1464     unsigned long match_count;
1465 };
1466 
sigelem_comp(const void * a,const void * b)1467 static int sigelem_comp(const void *a, const void *b)
1468 {
1469     const struct sigperf_elem *ela = a;
1470     const struct sigperf_elem *elb = b;
1471     return elb->usecs / elb->run_count - ela->usecs / ela->run_count;
1472 }
1473 
cli_sigperf_print()1474 void cli_sigperf_print()
1475 {
1476     struct sigperf_elem stats[MAX_TRACKED_BC], *elem = stats;
1477     int i, elems = 0, max_name_len = 0, name_len;
1478 
1479     if (!g_sigid || !g_sigevents) {
1480         cli_warnmsg("cli_sigperf_print: statistics requested but no bytecodes were loaded!\n");
1481         return;
1482     }
1483 
1484     memset(stats, 0, sizeof(stats));
1485     for (i = 0; i < MAX_TRACKED_BC; i++) {
1486         union ev_val val;
1487         uint32_t count;
1488         const char *name = cli_event_get_name(g_sigevents, i * BC_EVENTS_PER_SIG);
1489         cli_event_get(g_sigevents, i * BC_EVENTS_PER_SIG, &val, &count);
1490         if (!count) {
1491             if (name)
1492                 cli_dbgmsg("No event triggered for %s\n", name);
1493             continue;
1494         }
1495         if (name)
1496             name_len = (int)strlen(name);
1497         else
1498             name_len = 0;
1499         if (name_len > max_name_len)
1500             max_name_len = name_len;
1501         elem->bc_name   = name ? name : "\"noname\"";
1502         elem->usecs     = val.v_int;
1503         elem->run_count = count;
1504         cli_event_get(g_sigevents, i * BC_EVENTS_PER_SIG + 1, &val, &count);
1505         elem->match_count = count;
1506         elem++;
1507         elems++;
1508     }
1509     if (max_name_len < (int)strlen("Bytecode name"))
1510         max_name_len = (int)strlen("Bytecode name");
1511 
1512     cli_qsort(stats, elems, sizeof(struct sigperf_elem), sigelem_comp);
1513 
1514     elem = stats;
1515     /* name runs matches microsecs avg */
1516     cli_infomsg(NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "Bytecode name",
1517                 8, "#runs", 8, "#matches", 12, "usecs total", 9, "usecs avg");
1518     cli_infomsg(NULL, "%-*s %*s %*s %*s %*s\n", max_name_len, "=============",
1519                 8, "=====", 8, "========", 12, "===========", 9, "=========");
1520     while (elem->run_count) {
1521         cli_infomsg(NULL, "%-*s %*lu %*lu %*" PRIu64 " %*.2f\n", max_name_len, elem->bc_name,
1522                     8, elem->run_count, 8, elem->match_count,
1523                     12, elem->usecs, 9, (double)elem->usecs / elem->run_count);
1524         elem++;
1525     }
1526 }
1527 
sigperf_events_init(struct cli_bc * bc)1528 static void sigperf_events_init(struct cli_bc *bc)
1529 {
1530     int ret;
1531     char *bc_name;
1532 
1533     if (!g_sigevents)
1534         g_sigevents = cli_events_new(MAX_BC_SIGEVENT_ID);
1535 
1536     if (!g_sigevents) {
1537         cli_errmsg("No memory for events table\n");
1538         return;
1539     }
1540 
1541     if (g_sigid > MAX_BC_SIGEVENT_ID - BC_EVENTS_PER_SIG - 1) {
1542         cli_errmsg("sigperf_events_init: events table full. Increase MAX_TRACKED_BC\n");
1543         return;
1544     }
1545 
1546     if (!(bc_name = bc->lsig)) {
1547         if (!(bc_name = bc->hook_name)) {
1548             cli_dbgmsg("cli_event_define error for time event id %d\n", bc->sigtime_id);
1549             return;
1550         }
1551     }
1552 
1553     cli_dbgmsg("sigperf_events_init(): adding sig ids starting %u for %s\n", g_sigid, bc_name);
1554 
1555     /* register time event */
1556     bc->sigtime_id = g_sigid;
1557     ret            = cli_event_define(g_sigevents, g_sigid++, bc_name, ev_time, multiple_sum);
1558     if (ret) {
1559         cli_errmsg("sigperf_events_init: cli_event_define() error for time event id %d\n", bc->sigtime_id);
1560         bc->sigtime_id = MAX_BC_SIGEVENT_ID + 1;
1561         return;
1562     }
1563 
1564     /* register match count */
1565     bc->sigmatch_id = g_sigid;
1566     ret             = cli_event_define(g_sigevents, g_sigid++, bc_name, ev_int, multiple_sum);
1567     if (ret) {
1568         cli_errmsg("sigperf_events_init: cli_event_define() error for matches event id %d\n", bc->sigmatch_id);
1569         bc->sigmatch_id = MAX_BC_SIGEVENT_ID + 1;
1570         return;
1571     }
1572 }
1573 
cli_sigperf_events_destroy()1574 void cli_sigperf_events_destroy()
1575 {
1576     cli_events_free(g_sigevents);
1577 }
1578 
cli_bytecode_load(struct cli_bc * bc,FILE * f,struct cli_dbio * dbio,int trust,int sigperf)1579 int cli_bytecode_load(struct cli_bc *bc, FILE *f, struct cli_dbio *dbio, int trust, int sigperf)
1580 {
1581     unsigned row = 0, current_func = 0, bb = 0;
1582     char *buffer;
1583     unsigned linelength = 0;
1584     char firstbuf[FILEBUFF];
1585     enum parse_state state;
1586     int rc, end = 0;
1587 
1588     memset(bc, 0, sizeof(*bc));
1589     cli_dbgmsg("Loading %s bytecode\n", trust ? "trusted" : "untrusted");
1590     bc->trusted = trust;
1591     if (!f && !dbio) {
1592         cli_errmsg("Unable to load bytecode (null file)\n");
1593         return CL_ENULLARG;
1594     }
1595     if (!cli_dbgets(firstbuf, FILEBUFF, f, dbio)) {
1596         cli_errmsg("Unable to load bytecode (empty file)\n");
1597         return CL_EMALFDB;
1598     }
1599     cli_chomp(firstbuf);
1600     rc    = parseHeader(bc, (unsigned char *)firstbuf, &linelength);
1601     state = PARSE_BC_LSIG;
1602     if (rc == CL_BREAK) {
1603         const char *len = strchr(firstbuf, ':');
1604         bc->state       = bc_skip;
1605         if (!linelength) {
1606             linelength = len ? atoi(len + 1) : 4096;
1607         }
1608         if (linelength < 4096)
1609             linelength = 4096;
1610         cli_dbgmsg("line: %d\n", linelength);
1611         state = PARSE_SKIP;
1612         rc    = CL_SUCCESS;
1613     }
1614     if (rc != CL_SUCCESS) {
1615         cli_errmsg("Error at bytecode line %u\n", row);
1616         return rc;
1617     }
1618     buffer = cli_malloc(linelength);
1619     if (!buffer) {
1620         cli_errmsg("Out of memory allocating line of length %u\n", linelength);
1621         return CL_EMEM;
1622     }
1623     while (cli_dbgets(buffer, linelength, f, dbio) && !end) {
1624         cli_chomp(buffer);
1625         row++;
1626         switch (state) {
1627             case PARSE_BC_LSIG:
1628                 rc = parseLSig(bc, buffer);
1629 #if 0
1630 DEAD CODE
1631 		if (rc == CL_BREAK) /* skip */ { //FIXME: parseLSig always returns CL_SUCCESS
1632 		    bc->state = bc_skip;
1633 		    state = PARSE_SKIP;
1634 		    continue;
1635 		}
1636 		if (rc != CL_SUCCESS) { //FIXME: parseLSig always returns CL_SUCCESS
1637 		    cli_errmsg("Error at bytecode line %u\n", row);
1638 		    free(buffer);
1639 		    return rc;
1640 		}
1641 #endif
1642                 state = PARSE_BC_TYPES;
1643                 break;
1644             case PARSE_BC_TYPES:
1645                 rc = parseTypes(bc, (unsigned char *)buffer);
1646                 if (rc != CL_SUCCESS) {
1647                     cli_errmsg("Error at bytecode line %u\n", row);
1648                     free(buffer);
1649                     return rc;
1650                 }
1651                 state = PARSE_BC_APIS;
1652                 break;
1653             case PARSE_BC_APIS:
1654                 rc = parseApis(bc, (unsigned char *)buffer);
1655                 if (rc == CL_BREAK) /* skip */ {
1656                     bc->state = bc_skip;
1657                     state     = PARSE_SKIP;
1658                     continue;
1659                 }
1660                 if (rc != CL_SUCCESS) {
1661                     cli_errmsg("Error at bytecode line %u\n", row);
1662                     free(buffer);
1663                     return rc;
1664                 }
1665                 state = PARSE_BC_GLOBALS;
1666                 break;
1667             case PARSE_BC_GLOBALS:
1668                 rc = parseGlobals(bc, (unsigned char *)buffer);
1669                 if (rc == CL_BREAK) /* skip */ {
1670                     bc->state = bc_skip;
1671                     state     = PARSE_SKIP;
1672                     continue;
1673                 }
1674                 if (rc != CL_SUCCESS) {
1675                     cli_errmsg("Error at bytecode line %u\n", row);
1676                     free(buffer);
1677                     return rc;
1678                 }
1679                 state = PARSE_MD_OPT_HEADER;
1680                 break;
1681             case PARSE_MD_OPT_HEADER:
1682                 if (buffer[0] == 'D') {
1683                     rc = parseMD(bc, (unsigned char *)buffer);
1684                     if (rc != CL_SUCCESS) {
1685                         cli_errmsg("Error at bytecode line %u\n", row);
1686                         free(buffer);
1687                         return rc;
1688                     }
1689                     break;
1690                 }
1691                 /* fall-through */
1692             case PARSE_FUNC_HEADER:
1693                 if (*buffer == 'S') {
1694                     end = 1;
1695                     break;
1696                 }
1697                 rc = parseFunctionHeader(bc, current_func, (unsigned char *)buffer);
1698                 if (rc != CL_SUCCESS) {
1699                     cli_errmsg("Error at bytecode line %u\n", row);
1700                     free(buffer);
1701                     return rc;
1702                 }
1703                 bb    = 0;
1704                 state = PARSE_BB;
1705                 break;
1706             case PARSE_BB:
1707                 rc = parseBB(bc, current_func, bb++, (unsigned char *)buffer);
1708                 if (rc != CL_SUCCESS) {
1709                     cli_errmsg("Error at bytecode line %u\n", row);
1710                     free(buffer);
1711                     return rc;
1712                 }
1713                 if (bb >= bc->funcs[current_func].numBB) {
1714                     if (bc->funcs[current_func].insn_idx != bc->funcs[current_func].numInsts) {
1715                         cli_errmsg("Parsed different number of instructions than declared: %u != %u\n",
1716                                    bc->funcs[current_func].insn_idx, bc->funcs[current_func].numInsts);
1717                         free(buffer);
1718                         return CL_EMALFDB;
1719                     }
1720                     cli_dbgmsg("Parsed %u BBs, %u instructions\n",
1721                                bb, bc->funcs[current_func].numInsts);
1722                     state = PARSE_FUNC_HEADER;
1723                     current_func++;
1724                 }
1725                 break;
1726             case PARSE_SKIP:
1727                 /* stop at S (source code), readdb.c knows how to skip this one
1728 		 * */
1729                 if (buffer[0] == 'S')
1730                     end = 1;
1731                 /* noop parse, but we need to use dbgets with dynamic buffer,
1732 		 * otherwise we get 'Line too long for provided buffer' */
1733                 break;
1734         }
1735     }
1736     free(buffer);
1737     cli_dbgmsg("Parsed %d functions\n", current_func);
1738     if (sigperf)
1739         sigperf_events_init(bc);
1740     if (current_func != bc->num_func && bc->state != bc_skip) {
1741         cli_errmsg("Loaded less functions than declared: %u vs. %u\n",
1742                    current_func, bc->num_func);
1743         return CL_EMALFDB;
1744     }
1745     return CL_SUCCESS;
1746 }
1747 
1748 static struct {
1749     enum bc_events id;
1750     const char *name;
1751     enum ev_type type;
1752     enum multiple_handling multiple;
1753 } bc_events[] = {
1754     {BCEV_VIRUSNAME, "virusname", ev_string, multiple_last},
1755     {BCEV_EXEC_RETURNVALUE, "returnvalue", ev_int, multiple_last},
1756     {BCEV_WRITE, "bcapi_write", ev_data_fast, multiple_sum},
1757     {BCEV_OFFSET, "read offset", ev_int, multiple_sum},
1758     {BCEV_READ, "read data", ev_data_fast, multiple_sum},
1759     //{BCEV_READ, "read data", ev_data, multiple_concat},
1760     {BCEV_DBG_STR, "debug message", ev_data_fast, multiple_sum},
1761     {BCEV_DBG_INT, "debug int", ev_int, multiple_sum},
1762     {BCEV_MEM_1, "memmem 1", ev_data_fast, multiple_sum},
1763     {BCEV_MEM_2, "memmem 2", ev_data_fast, multiple_sum},
1764     {BCEV_FIND, "find", ev_data_fast, multiple_sum},
1765     {BCEV_EXTRACTED, "extracted files", ev_int, multiple_sum},
1766     {BCEV_READ_ERR, "read errors", ev_int, multiple_sum},
1767     {BCEV_DISASM_FAIL, "disasm fails", ev_int, multiple_sum},
1768     {BCEV_EXEC_TIME, "bytecode execute", ev_time, multiple_sum}};
1769 
register_events(cli_events_t * ev)1770 static int register_events(cli_events_t *ev)
1771 {
1772     size_t i;
1773     for (i = 0; i < sizeof(bc_events) / sizeof(bc_events[0]); i++) {
1774         if (cli_event_define(ev, bc_events[i].id, bc_events[i].name, bc_events[i].type,
1775                              bc_events[i].multiple) == -1)
1776             return -1;
1777     }
1778     return 0;
1779 }
1780 
cli_bytecode_run(const struct cli_all_bc * bcs,const struct cli_bc * bc,struct cli_bc_ctx * ctx)1781 int cli_bytecode_run(const struct cli_all_bc *bcs, const struct cli_bc *bc, struct cli_bc_ctx *ctx)
1782 {
1783     int ret = CL_SUCCESS;
1784     struct cli_bc_inst inst;
1785     struct cli_bc_func func;
1786     cli_events_t *jit_ev = NULL, *interp_ev = NULL;
1787 
1788     int test_mode = 0;
1789     cli_ctx *cctx = (cli_ctx *)ctx->ctx;
1790 
1791     if (!ctx || !ctx->bc || !ctx->func)
1792         return CL_ENULLARG;
1793     if (ctx->numParams && (!ctx->values || !ctx->operands))
1794         return CL_ENULLARG;
1795 
1796     if (cctx && cctx->engine->bytecode_mode == CL_BYTECODE_MODE_TEST)
1797         test_mode = 1;
1798 
1799     if (bc->state == bc_loaded) {
1800         cli_errmsg("bytecode has to be prepared either for interpreter or JIT!\n");
1801         return CL_EARG;
1802     }
1803     if (bc->state == bc_disabled) {
1804         cli_dbgmsg("bytecode triggered but running bytecodes is disabled\n");
1805         return CL_SUCCESS;
1806     }
1807     if (cctx)
1808         cli_event_time_start(cctx->perf, PERFT_BYTECODE);
1809     ctx->env = &bcs->env;
1810     context_safe(ctx);
1811     if (test_mode) {
1812         jit_ev    = cli_events_new(BCEV_LASTEVENT);
1813         interp_ev = cli_events_new(BCEV_LASTEVENT);
1814         if (!jit_ev || !interp_ev) {
1815             cli_events_free(jit_ev);
1816             cli_events_free(interp_ev);
1817             return CL_EMEM;
1818         }
1819         if (register_events(jit_ev) == -1 ||
1820             register_events(interp_ev) == -1) {
1821             cli_events_free(jit_ev);
1822             cli_events_free(interp_ev);
1823             return CL_EBYTECODE_TESTFAIL;
1824         }
1825     }
1826     cli_event_time_start(g_sigevents, bc->sigtime_id);
1827     if (bc->state == bc_interp || test_mode) {
1828         ctx->bc_events = interp_ev;
1829         memset(&func, 0, sizeof(func));
1830         func.numInsts     = 1;
1831         func.numValues    = 1;
1832         func.numConstants = 0;
1833         func.numBytes     = ctx->bytes;
1834         memset(ctx->values + ctx->bytes - 8, 0, 8);
1835 
1836         inst.opcode        = OP_BC_CALL_DIRECT;
1837         inst.interp_op     = OP_BC_CALL_DIRECT * 5;
1838         inst.dest          = func.numArgs;
1839         inst.type          = 0;
1840         inst.u.ops.numOps  = ctx->numParams;
1841         inst.u.ops.funcid  = ctx->funcid;
1842         inst.u.ops.ops     = ctx->operands;
1843         inst.u.ops.opsizes = ctx->opsizes;
1844         cli_dbgmsg("Bytecode %u: executing in interpreter mode\n", bc->id);
1845 
1846         ctx->on_jit = 0;
1847 
1848         cli_event_time_start(interp_ev, BCEV_EXEC_TIME);
1849         ret = cli_vm_execute(ctx->bc, ctx, &func, &inst);
1850         cli_event_time_stop(interp_ev, BCEV_EXEC_TIME);
1851 
1852         cli_event_int(interp_ev, BCEV_EXEC_RETURNVALUE, ret);
1853         cli_event_string(interp_ev, BCEV_VIRUSNAME, ctx->virname);
1854 
1855         /* need to be called here to catch any extracted but not yet scanned files */
1856         if (ctx->outfd && (ret != CL_VIRUS || cctx->options->general & CL_SCAN_GENERAL_ALLMATCHES))
1857             cli_bcapi_extract_new(ctx, -1);
1858     }
1859     if (bc->state == bc_jit || test_mode) {
1860         if (test_mode) {
1861             ctx->off = 0;
1862         }
1863         ctx->bc_events = jit_ev;
1864         cli_dbgmsg("Bytecode %u: executing in JIT mode\n", bc->id);
1865 
1866         ctx->on_jit = 1;
1867         cli_event_time_start(jit_ev, BCEV_EXEC_TIME);
1868         ret = cli_vm_execute_jit(bcs, ctx, &bc->funcs[ctx->funcid]);
1869         cli_event_time_stop(jit_ev, BCEV_EXEC_TIME);
1870 
1871         cli_event_int(jit_ev, BCEV_EXEC_RETURNVALUE, ret);
1872         cli_event_string(jit_ev, BCEV_VIRUSNAME, ctx->virname);
1873 
1874         /* need to be called here to catch any extracted but not yet scanned files */
1875         if (ctx->outfd && (ret != CL_VIRUS || cctx->options->general & CL_SCAN_GENERAL_ALLMATCHES))
1876             cli_bcapi_extract_new(ctx, -1);
1877     }
1878     cli_event_time_stop(g_sigevents, bc->sigtime_id);
1879     if (ctx->virname)
1880         cli_event_count(g_sigevents, bc->sigmatch_id);
1881 
1882     if (test_mode) {
1883         unsigned interp_errors = cli_event_errors(interp_ev);
1884         unsigned jit_errors    = cli_event_errors(jit_ev);
1885         unsigned interp_warns = 0, jit_warns = 0;
1886         int ok = 1;
1887         enum bc_events evid;
1888 
1889         if (interp_errors || jit_errors) {
1890             cli_infomsg(cctx, "bytecode %d encountered %u JIT and %u interpreter errors\n",
1891                         bc->id, interp_errors, jit_errors);
1892             ok = 0;
1893         }
1894         if (!ctx->no_diff && cli_event_diff_all(interp_ev, jit_ev, NULL)) {
1895             cli_infomsg(cctx, "bytecode %d execution different with JIT and interpreter, see --debug for details\n",
1896                         bc->id);
1897             ok = 0;
1898         }
1899         for (evid = BCEV_API_WARN_BEGIN + 1; evid < BCEV_API_WARN_END; evid++) {
1900             union ev_val v;
1901             uint32_t count = 0;
1902             cli_event_get(interp_ev, evid, &v, &count);
1903             interp_warns += count;
1904             count = 0;
1905             cli_event_get(jit_ev, evid, &v, &count);
1906             jit_warns += count;
1907         }
1908         if (interp_warns || jit_warns) {
1909             cli_infomsg(cctx, "bytecode %d encountered %u JIT and %u interpreter warnings\n",
1910                         bc->id, interp_warns, jit_warns);
1911             ok = 0;
1912         }
1913         /*cli_event_debug(jit_ev, BCEV_EXEC_TIME);
1914         cli_event_debug(interp_ev, BCEV_EXEC_TIME);
1915 	cli_event_debug(g_sigevents, bc->sigtime_id);*/
1916         if (!ok) {
1917             cli_events_free(jit_ev);
1918             cli_events_free(interp_ev);
1919             return CL_EBYTECODE_TESTFAIL;
1920         }
1921     }
1922     cli_events_free(jit_ev);
1923     cli_events_free(interp_ev);
1924     if (cctx)
1925         cli_event_time_stop(cctx->perf, PERFT_BYTECODE);
1926     return ret;
1927 }
1928 
cli_bytecode_context_getresult_int(struct cli_bc_ctx * ctx)1929 uint64_t cli_bytecode_context_getresult_int(struct cli_bc_ctx *ctx)
1930 {
1931     return *(uint32_t *)ctx->values; /*XXX*/
1932 }
1933 
cli_bytecode_destroy(struct cli_bc * bc)1934 void cli_bytecode_destroy(struct cli_bc *bc)
1935 {
1936     unsigned i, j, k;
1937     free(bc->metadata.compiler);
1938     free(bc->metadata.sigmaker);
1939 
1940     if (bc->funcs) {
1941         for (i = 0; i < bc->num_func; i++) {
1942             struct cli_bc_func *f = &bc->funcs[i];
1943             if (!f)
1944                 continue;
1945             free(f->types);
1946 
1947             for (j = 0; j < f->numBB; j++) {
1948                 struct cli_bc_bb *BB = &f->BB[j];
1949                 for (k = 0; k < BB->numInsts; k++) {
1950                     struct cli_bc_inst *ii = &BB->insts[k];
1951                     if (operand_counts[ii->opcode] > 3 ||
1952                         ii->opcode == OP_BC_CALL_DIRECT || ii->opcode == OP_BC_CALL_API) {
1953                         free(ii->u.ops.ops);
1954                         free(ii->u.ops.opsizes);
1955                     }
1956                 }
1957             }
1958             free(f->BB);
1959             free(f->allinsts);
1960             free(f->constants);
1961         }
1962         free(bc->funcs);
1963     }
1964     if (bc->types) {
1965         for (i = NUM_STATIC_TYPES; i < bc->num_types; i++) {
1966             if (bc->types[i].containedTypes)
1967                 free(bc->types[i].containedTypes);
1968         }
1969         free(bc->types);
1970     }
1971 
1972     if (bc->globals) {
1973         for (i = 0; i < bc->num_globals; i++) {
1974             free(bc->globals[i]);
1975         }
1976         free(bc->globals);
1977     }
1978     if (bc->dbgnodes) {
1979         for (i = 0; i < bc->dbgnode_cnt; i++) {
1980             for (j = 0; j < bc->dbgnodes[i].numelements; j++) {
1981                 struct cli_bc_dbgnode_element *el = &bc->dbgnodes[i].elements[j];
1982                 if (el && el->string)
1983                     free(el->string);
1984             }
1985         }
1986         free(bc->dbgnodes);
1987     }
1988     free(bc->globaltys);
1989     if (bc->uses_apis)
1990         cli_bitset_free(bc->uses_apis);
1991     free(bc->lsig);
1992     free(bc->hook_name);
1993     free(bc->globalBytes);
1994     memset(bc, 0, sizeof(*bc));
1995 }
1996 
1997 #define MAP(val)                                                                                                            \
1998     do {                                                                                                                    \
1999         operand_t o = val;                                                                                                  \
2000         if (o & 0x80000000) {                                                                                               \
2001             o &= 0x7fffffff;                                                                                                \
2002             if (o > bc->num_globals) {                                                                                      \
2003                 cli_errmsg("bytecode: global out of range: %u > %u, for instruction %u in function %u\n",                   \
2004                            o, (unsigned)bc->num_globals, j, i);                                                             \
2005                 free(map);                                                                                                  \
2006                 free(gmap);                                                                                                 \
2007                 return CL_EBYTECODE;                                                                                        \
2008             }                                                                                                               \
2009             val = 0x80000000 | gmap[o];                                                                                     \
2010             break;                                                                                                          \
2011         }                                                                                                                   \
2012         if (o >= totValues) {                                                                                               \
2013             cli_errmsg("bytecode: operand out of range: %u > %u, for instruction %u in function %u\n", o, totValues, j, i); \
2014             free(map);                                                                                                      \
2015             free(gmap);                                                                                                     \
2016             return CL_EBYTECODE;                                                                                            \
2017         }                                                                                                                   \
2018         val = map[o];                                                                                                       \
2019     } while (0)
2020 
2021 #define MAPPTR(val)                                                   \
2022     {                                                                 \
2023         if ((val < bcfunc->numValues) && bcfunc->types[val] & 0x8000) \
2024             val = map[val] | 0x40000000;                              \
2025         else                                                          \
2026             MAP(val);                                                 \
2027     }
2028 
ptr_compose(int32_t id,uint32_t offset)2029 static inline int64_t ptr_compose(int32_t id, uint32_t offset)
2030 {
2031     uint64_t i = id;
2032     return (i << 32) | offset;
2033 }
2034 
get_geptypesize(const struct cli_bc * bc,uint16_t tid)2035 static inline int get_geptypesize(const struct cli_bc *bc, uint16_t tid)
2036 {
2037     const struct cli_bc_type *ty;
2038     if (tid >= bc->num_types + 65) {
2039         cli_errmsg("bytecode: typeid out of range %u >= %u\n", tid, bc->num_types);
2040         return -1;
2041     }
2042     if (tid <= 64) {
2043         cli_errmsg("bytecode: invalid type for gep (%u)\n", tid);
2044         return -1;
2045     }
2046     ty = &bc->types[tid - 65];
2047     if (ty->kind != DPointerType) {
2048         cli_errmsg("bytecode: invalid gep type, must be pointer: %u\n", tid);
2049         return -1;
2050     }
2051     return typesize(bc, ty->containedTypes[0]);
2052 }
2053 
calc_gepz(struct cli_bc * bc,struct cli_bc_func * func,uint16_t tid,operand_t op)2054 static int calc_gepz(struct cli_bc *bc, struct cli_bc_func *func, uint16_t tid, operand_t op)
2055 {
2056     unsigned off = 0, i;
2057     uint32_t *gepoff;
2058     const struct cli_bc_type *ty;
2059     if (tid >= bc->num_types + 65) {
2060         cli_errmsg("bytecode: typeid out of range %u >= %u\n", tid, bc->num_types);
2061         return -1;
2062     }
2063     if (tid <= 65) {
2064         cli_errmsg("bytecode: invalid type for gep (%u)\n", tid);
2065         return -1;
2066     }
2067     ty = &bc->types[tid - 65];
2068     if (ty->kind != DPointerType || ty->containedTypes[0] < 65) {
2069         cli_errmsg("bytecode: invalid gep type, must be pointer to nonint: %u\n", tid);
2070         return -1;
2071     }
2072     ty = &bc->types[ty->containedTypes[0] - 65];
2073     if (ty->kind != DStructType && ty->kind != DPackedStructType)
2074         return 0;
2075     gepoff = (uint32_t *)&func->constants[op - func->numValues];
2076     if (*gepoff >= ty->numElements) {
2077         cli_errmsg("bytecode: gep offset out of range: %d >= %d\n", (uint32_t)*gepoff, ty->numElements);
2078         return -1;
2079     }
2080     for (i = 0; i < *gepoff; i++) {
2081         off += typesize(bc, ty->containedTypes[i]);
2082     }
2083     *gepoff = off;
2084     return 1;
2085 }
2086 
cli_bytecode_prepare_interpreter(struct cli_bc * bc)2087 static int cli_bytecode_prepare_interpreter(struct cli_bc *bc)
2088 {
2089     unsigned i, j, k;
2090     uint64_t *gmap;
2091     unsigned bcglobalid = cli_apicall_maxglobal - _FIRST_GLOBAL + 2;
2092     int ret             = CL_SUCCESS;
2093     bc->numGlobalBytes  = 0;
2094     gmap                = cli_malloc(bc->num_globals * sizeof(*gmap));
2095     if (!gmap) {
2096         cli_errmsg("interpreter: Unable to allocate memory for global map: %zu\n", bc->num_globals * sizeof(*gmap));
2097         return CL_EMEM;
2098     }
2099     for (j = 0; j < bc->num_globals; j++) {
2100         uint16_t ty    = bc->globaltys[j];
2101         unsigned align = typealign(bc, ty);
2102         assert(align);
2103         bc->numGlobalBytes = (bc->numGlobalBytes + align - 1) & (~(align - 1));
2104         gmap[j]            = bc->numGlobalBytes;
2105         bc->numGlobalBytes += typesize(bc, ty);
2106     }
2107     if (bc->numGlobalBytes) {
2108         bc->globalBytes = cli_calloc(1, bc->numGlobalBytes);
2109         if (!bc->globalBytes) {
2110             cli_errmsg("interpreter: Unable to allocate memory for globalBytes: %u\n", bc->numGlobalBytes);
2111             free(gmap);
2112             return CL_EMEM;
2113         }
2114     } else
2115         bc->globalBytes = NULL;
2116 
2117     for (j = 0; j < bc->num_globals; j++) {
2118         struct cli_bc_type *ty;
2119         if (bc->globaltys[j] < 65)
2120             continue;
2121         ty = &bc->types[bc->globaltys[j] - 65];
2122         switch (ty->kind) {
2123             case DPointerType: {
2124                 uint64_t ptr;
2125                 if (bc->globals[j][1] >= _FIRST_GLOBAL) {
2126                     ptr = ptr_compose(bc->globals[j][1] - _FIRST_GLOBAL + 1,
2127                                       bc->globals[j][0]);
2128                 } else {
2129                     if (bc->globals[j][1] > bc->num_globals)
2130                         continue;
2131                     ptr = ptr_compose(bcglobalid,
2132                                       gmap[bc->globals[j][1]] + bc->globals[j][0]);
2133                 }
2134                 *(uint64_t *)&bc->globalBytes[gmap[j]] = ptr;
2135                 break;
2136             }
2137             case DArrayType: {
2138                 unsigned elsize, i, off = gmap[j];
2139                 /* TODO: support other than ints in arrays */
2140                 elsize = typesize(bc, ty->containedTypes[0]);
2141                 switch (elsize) {
2142                     case 1:
2143                         for (i = 0; i < ty->numElements; i++)
2144                             bc->globalBytes[off + i] = bc->globals[j][i];
2145                         break;
2146                     case 2:
2147                         for (i = 0; i < ty->numElements; i++)
2148                             *(uint16_t *)&bc->globalBytes[off + i * 2] = bc->globals[j][i];
2149                         break;
2150                     case 4:
2151                         for (i = 0; i < ty->numElements; i++)
2152                             *(uint32_t *)&bc->globalBytes[off + i * 4] = bc->globals[j][i];
2153                         break;
2154                     case 8:
2155                         for (i = 0; i < ty->numElements; i++)
2156                             *(uint64_t *)&bc->globalBytes[off + i * 8] = bc->globals[j][i];
2157                         break;
2158                     default:
2159                         cli_dbgmsg("interpreter: unsupported elsize: %u\n", elsize);
2160                 }
2161                 break;
2162             }
2163             default:
2164                 /*TODO*/
2165                 if (!bc->globals[j][1])
2166                     continue; /* null */
2167                 break;
2168         }
2169     }
2170 
2171     for (i = 0; i < bc->num_func && ret == CL_SUCCESS; i++) {
2172         struct cli_bc_func *bcfunc = &bc->funcs[i];
2173         unsigned totValues         = bcfunc->numValues + bcfunc->numConstants + bc->num_globals;
2174         unsigned *map              = cli_malloc(sizeof(*map) * (size_t)totValues);
2175         if (!map) {
2176             cli_errmsg("interpreter: Unable to allocate memory for map: %zu\n", sizeof(*map) * (size_t)totValues);
2177             free(gmap);
2178             return CL_EMEM;
2179         }
2180         bcfunc->numBytes = 0;
2181         for (j = 0; j < bcfunc->numValues; j++) {
2182             uint16_t ty = bcfunc->types[j];
2183             unsigned align;
2184             align = typealign(bc, ty);
2185             assert(!ty || typesize(bc, ty));
2186             assert(align);
2187             bcfunc->numBytes = (bcfunc->numBytes + align - 1) & (~(align - 1));
2188             map[j]           = bcfunc->numBytes;
2189             /* printf("%d -> %d, %u\n", j, map[j], typesize(bc, ty)); */
2190             bcfunc->numBytes += typesize(bc, ty);
2191             /* TODO: don't allow size 0, it is always a bug! */
2192         }
2193         bcfunc->numBytes = (bcfunc->numBytes + 7) & ~7;
2194         for (j = 0; j < bcfunc->numConstants; j++) {
2195             map[bcfunc->numValues + j] = bcfunc->numBytes;
2196             bcfunc->numBytes += 8;
2197         }
2198         for (j = 0; j < bcfunc->numInsts && ret == CL_SUCCESS; j++) {
2199             struct cli_bc_inst *inst = &bcfunc->allinsts[j];
2200             inst->dest               = map[inst->dest];
2201             switch (inst->opcode) {
2202                 case OP_BC_ADD:
2203                 case OP_BC_SUB:
2204                 case OP_BC_MUL:
2205                 case OP_BC_UDIV:
2206                 case OP_BC_SDIV:
2207                 case OP_BC_UREM:
2208                 case OP_BC_SREM:
2209                 case OP_BC_SHL:
2210                 case OP_BC_LSHR:
2211                 case OP_BC_ASHR:
2212                 case OP_BC_AND:
2213                 case OP_BC_OR:
2214                 case OP_BC_XOR:
2215                 case OP_BC_ICMP_EQ:
2216                 case OP_BC_ICMP_NE:
2217                 case OP_BC_ICMP_UGT:
2218                 case OP_BC_ICMP_UGE:
2219                 case OP_BC_ICMP_ULT:
2220                 case OP_BC_ICMP_ULE:
2221                 case OP_BC_ICMP_SGT:
2222                 case OP_BC_ICMP_SGE:
2223                 case OP_BC_ICMP_SLT:
2224                 case OP_BC_ICMP_SLE:
2225                 case OP_BC_COPY:
2226                 case OP_BC_STORE:
2227                     MAP(inst->u.binop[0]);
2228                     MAP(inst->u.binop[1]);
2229                     break;
2230                 case OP_BC_SEXT:
2231                 case OP_BC_ZEXT:
2232                 case OP_BC_TRUNC:
2233                     MAP(inst->u.cast.source);
2234                     break;
2235                 case OP_BC_BRANCH:
2236                     MAP(inst->u.branch.condition);
2237                     break;
2238                 case OP_BC_JMP:
2239                     break;
2240                 case OP_BC_RET:
2241                     MAP(inst->u.unaryop);
2242                     break;
2243                 case OP_BC_SELECT:
2244                     MAP(inst->u.three[0]);
2245                     MAP(inst->u.three[1]);
2246                     MAP(inst->u.three[2]);
2247                     break;
2248                 case OP_BC_CALL_API: /* fall-through */
2249                 case OP_BC_CALL_DIRECT: {
2250                     struct cli_bc_func *target = NULL;
2251                     if (inst->opcode == OP_BC_CALL_DIRECT) {
2252                         target = &bc->funcs[inst->u.ops.funcid];
2253                         if (inst->u.ops.funcid > bc->num_func) {
2254                             cli_errmsg("bytecode: called function out of range: %u > %u\n", inst->u.ops.funcid, bc->num_func);
2255                             ret = CL_EBYTECODE;
2256                         } else if (inst->u.ops.numOps != target->numArgs) {
2257                             cli_errmsg("bytecode: call operands don't match function prototype\n");
2258                             ret = CL_EBYTECODE;
2259                         }
2260                     } else {
2261                         /* APIs have at most 2 parameters always */
2262                         if (inst->u.ops.numOps > 5) {
2263                             cli_errmsg("bytecode: call operands don't match function prototype\n");
2264                             ret = CL_EBYTECODE;
2265                         }
2266                     }
2267                     if (ret != CL_SUCCESS)
2268                         break;
2269                     if (inst->u.ops.numOps > 0) {
2270                         inst->u.ops.opsizes = cli_malloc(sizeof(*inst->u.ops.opsizes) * inst->u.ops.numOps);
2271                         if (!inst->u.ops.opsizes) {
2272                             cli_errmsg("Out of memory when allocating operand sizes\n");
2273                             ret = CL_EMEM;
2274                             break;
2275                         }
2276                     } else {
2277                         inst->u.ops.opsizes = NULL;
2278                         break;
2279                     }
2280                     for (k = 0; k < inst->u.ops.numOps; k++) {
2281                         MAPPTR(inst->u.ops.ops[k]);
2282                         if (inst->opcode == OP_BC_CALL_DIRECT)
2283                             inst->u.ops.opsizes[k] = typesize(bc, target->types[k]);
2284                         else
2285                             inst->u.ops.opsizes[k] = 32; /*XXX*/
2286                     }
2287                     break;
2288                 }
2289                 case OP_BC_LOAD:
2290                     MAPPTR(inst->u.unaryop);
2291                     break;
2292                 case OP_BC_GEP1:
2293                     if (inst->u.three[1] & 0x80000000 ||
2294                         bcfunc->types[inst->u.binop[1]] & 0x8000) {
2295                         cli_errmsg("bytecode: gep1 of alloca is not allowed\n");
2296                         ret = CL_EBYTECODE;
2297                     }
2298                     if (ret != CL_SUCCESS)
2299                         break;
2300                     MAP(inst->u.three[1]);
2301                     MAP(inst->u.three[2]);
2302                     inst->u.three[0] = get_geptypesize(bc, inst->u.three[0]);
2303                     if ((int)(inst->u.three[0]) == -1)
2304                         ret = CL_EBYTECODE;
2305                     break;
2306                 case OP_BC_GEPZ:
2307                     /*three[0] is the type*/
2308                     if (inst->u.three[1] & 0x80000000 ||
2309                         bcfunc->types[inst->u.three[1]] & 0x8000)
2310                         inst->interp_op = 5 * (inst->interp_op / 5);
2311                     else
2312                         inst->interp_op = 5 * (inst->interp_op / 5) + 3;
2313                     MAP(inst->u.three[1]);
2314                     if (calc_gepz(bc, bcfunc, inst->u.three[0], inst->u.three[2]) == -1)
2315                         ret = CL_EBYTECODE;
2316                     if (ret == CL_SUCCESS)
2317                         MAP(inst->u.three[2]);
2318                     break;
2319                     /*		case OP_BC_GEPN:
2320 		    *TODO
2321 		    break;*/
2322                 case OP_BC_MEMSET:
2323                 case OP_BC_MEMCPY:
2324                 case OP_BC_MEMMOVE:
2325                 case OP_BC_MEMCMP:
2326                     MAPPTR(inst->u.three[0]);
2327                     MAPPTR(inst->u.three[1]);
2328                     MAP(inst->u.three[2]);
2329                     break;
2330                 case OP_BC_RET_VOID:
2331                 case OP_BC_ISBIGENDIAN:
2332                 case OP_BC_ABORT:
2333                     /* no operands */
2334                     break;
2335                 case OP_BC_BSWAP16:
2336                 case OP_BC_BSWAP32:
2337                 case OP_BC_BSWAP64:
2338                     MAP(inst->u.unaryop);
2339                     break;
2340                 case OP_BC_PTRDIFF32:
2341                     MAPPTR(inst->u.binop[0]);
2342                     MAPPTR(inst->u.binop[1]);
2343                     break;
2344                 case OP_BC_PTRTOINT64:
2345                     MAPPTR(inst->u.unaryop);
2346                     break;
2347                 default:
2348                     cli_warnmsg("Bytecode: unhandled opcode: %d\n", inst->opcode);
2349                     ret = CL_EBYTECODE;
2350             }
2351         }
2352         if (map)
2353             free(map);
2354     }
2355     free(gmap);
2356     bc->state = bc_interp;
2357     return ret;
2358 }
2359 
add_selfcheck(struct cli_all_bc * bcs)2360 static int add_selfcheck(struct cli_all_bc *bcs)
2361 {
2362     struct cli_bc_func *func;
2363     struct cli_bc_inst *inst;
2364     struct cli_bc *bc;
2365 
2366     bcs->all_bcs = cli_realloc2(bcs->all_bcs, sizeof(*bcs->all_bcs) * (bcs->count + 1));
2367     if (!bcs->all_bcs) {
2368         cli_errmsg("cli_loadcbc: Can't allocate memory for bytecode entry\n");
2369         return CL_EMEM;
2370     }
2371     bc = &bcs->all_bcs[bcs->count++];
2372     memset(bc, 0, sizeof(*bc));
2373 
2374     bc->trusted     = 1;
2375     bc->num_globals = 1;
2376     bc->globals     = cli_calloc(1, sizeof(*bc->globals));
2377     if (!bc->globals) {
2378         cli_errmsg("Failed to allocate memory for globals\n");
2379         return CL_EMEM;
2380     }
2381     bc->globals[0] = cli_calloc(1, sizeof(*bc->globals[0]));
2382     if (!bc->globals[0]) {
2383         cli_errmsg("Failed to allocate memory for globals\n");
2384         return CL_EMEM;
2385     }
2386     bc->globaltys = cli_calloc(1, sizeof(*bc->globaltys));
2387     if (!bc->globaltys) {
2388         cli_errmsg("Failed to allocate memory for globaltypes\n");
2389         return CL_EMEM;
2390     }
2391     bc->globaltys[0] = 32;
2392     *bc->globals[0]  = 0;
2393     bc->id           = ~0;
2394     bc->kind         = 0;
2395     bc->num_types    = 5;
2396     bc->num_func     = 1;
2397     bc->funcs        = cli_calloc(1, sizeof(*bc->funcs));
2398     if (!bc->funcs) {
2399         cli_errmsg("Failed to allocate memory for func\n");
2400         return CL_EMEM;
2401     }
2402     func               = bc->funcs;
2403     func->numInsts     = 2;
2404     func->numLocals    = 1;
2405     func->numValues    = 1;
2406     func->numConstants = 1;
2407     func->numBB        = 1;
2408     func->returnType   = 32;
2409     func->types        = cli_calloc(1, sizeof(*func->types));
2410     if (!func->types) {
2411         cli_errmsg("Failed to allocate memory for types\n");
2412         return CL_EMEM;
2413     }
2414     func->types[0] = 32;
2415     func->BB       = cli_calloc(1, sizeof(*func->BB));
2416     if (!func->BB) {
2417         cli_errmsg("Failed to allocate memory for BB\n");
2418         return CL_EMEM;
2419     }
2420     func->allinsts = cli_calloc(2, sizeof(*func->allinsts));
2421     if (!func->allinsts) {
2422         cli_errmsg("Failed to allocate memory for insts\n");
2423         return CL_EMEM;
2424     }
2425     func->BB->numInsts = 2;
2426     func->BB->insts    = func->allinsts;
2427     func->constants    = cli_calloc(1, sizeof(*func->constants));
2428     if (!func->constants) {
2429         cli_errmsg("Failed to allocate memory for constants\n");
2430         return CL_EMEM;
2431     }
2432     func->constants[0] = 0xf00d;
2433     inst               = func->allinsts;
2434 
2435     inst->opcode        = OP_BC_CALL_API;
2436     inst->u.ops.numOps  = 1;
2437     inst->u.ops.opsizes = NULL;
2438     inst->u.ops.ops     = cli_calloc(1, sizeof(*inst->u.ops.ops));
2439     if (!inst->u.ops.ops) {
2440         cli_errmsg("Failed to allocate memory for instructions\n");
2441         return CL_EMEM;
2442     }
2443     inst->u.ops.ops[0] = 1;
2444     inst->u.ops.funcid = 18; /* test2 */
2445     inst->dest         = 0;
2446     inst->type         = 32;
2447     inst->interp_op    = inst->opcode * 5 + 3;
2448 
2449     inst            = &func->allinsts[1];
2450     inst->opcode    = OP_BC_RET;
2451     inst->type      = 32;
2452     inst->u.unaryop = 0;
2453     inst->interp_op = inst->opcode * 5;
2454 
2455     bc->state = bc_loaded;
2456     return 0;
2457 }
2458 
run_selfcheck(struct cli_all_bc * bcs)2459 static int run_selfcheck(struct cli_all_bc *bcs)
2460 {
2461     struct cli_bc_ctx *ctx;
2462     struct cli_bc *bc = &bcs->all_bcs[bcs->count - 1];
2463     int rc;
2464     if (bc->state != bc_jit && bc->state != bc_interp) {
2465         cli_errmsg("Failed to prepare selfcheck bytecode\n");
2466         return CL_EBYTECODE;
2467     }
2468     ctx = cli_bytecode_context_alloc();
2469     if (!ctx) {
2470         cli_errmsg("Failed to allocate bytecode context\n");
2471         return CL_EMEM;
2472     }
2473     cli_bytecode_context_setfuncid(ctx, bc, 0);
2474 
2475     cli_dbgmsg("bytecode self test running\n");
2476     ctx->bytecode_timeout = 0;
2477     rc                    = cli_bytecode_run(bcs, bc, ctx);
2478     cli_bytecode_context_destroy(ctx);
2479     if (rc != CL_SUCCESS) {
2480         cli_errmsg("bytecode self test failed: %s\n",
2481                    cl_strerror(rc));
2482     } else {
2483         cli_dbgmsg("bytecode self test succeeded\n");
2484     }
2485     return rc;
2486 }
2487 
selfcheck(int jit,struct cli_bcengine * engine)2488 static int selfcheck(int jit, struct cli_bcengine *engine)
2489 {
2490     struct cli_all_bc bcs;
2491     int rc;
2492 
2493     memset(&bcs, 0, sizeof(bcs));
2494     bcs.all_bcs = NULL;
2495     bcs.count   = 0;
2496     bcs.engine  = engine;
2497     rc          = add_selfcheck(&bcs);
2498     if (rc == CL_SUCCESS) {
2499         if (jit) {
2500             if (!bcs.engine) {
2501                 cli_dbgmsg("bytecode: JIT disabled\n");
2502                 rc = CL_BREAK; /* no JIT - not fatal */
2503             } else {
2504                 rc = cli_bytecode_prepare_jit(&bcs);
2505             }
2506         } else {
2507             rc = cli_bytecode_prepare_interpreter(bcs.all_bcs);
2508         }
2509         if (rc == CL_SUCCESS)
2510             rc = run_selfcheck(&bcs);
2511         if (rc == CL_BREAK)
2512             rc = CL_SUCCESS;
2513     }
2514     cli_bytecode_destroy(bcs.all_bcs);
2515     free(bcs.all_bcs);
2516     cli_bytecode_done_jit(&bcs, 1);
2517     if (rc != CL_SUCCESS) {
2518         cli_errmsg("Bytecode: failed to run selfcheck in %s mode: %s\n",
2519                    jit ? "JIT" : "interpreter", cl_strerror(rc));
2520     }
2521     return rc;
2522 }
2523 
set_mode(struct cl_engine * engine,enum bytecode_mode mode)2524 static int set_mode(struct cl_engine *engine, enum bytecode_mode mode)
2525 {
2526     if (engine->bytecode_mode == mode)
2527         return 0;
2528     if (engine->bytecode_mode == CL_BYTECODE_MODE_OFF) {
2529         cli_errmsg("bytecode: already turned off, can't turn it on again!\n");
2530         return -1;
2531     }
2532     cli_dbgmsg("Bytecode: mode changed to %d\n", mode);
2533     if (engine->bytecode_mode == CL_BYTECODE_MODE_TEST) {
2534         if (mode == CL_BYTECODE_MODE_OFF || have_clamjit) {
2535             cli_errmsg("bytecode: in test mode but JIT/bytecode is about to be disabled: %d\n", mode);
2536             engine->bytecode_mode = mode;
2537             return -1;
2538         }
2539         return 0;
2540     }
2541     if (engine->bytecode_mode == CL_BYTECODE_MODE_JIT) {
2542         cli_errmsg("bytecode: in JIT mode but JIT is about to be disabled: %d\n", mode);
2543         engine->bytecode_mode = mode;
2544         return -1;
2545     }
2546     engine->bytecode_mode = mode;
2547     return 0;
2548 }
2549 
2550 /* runs the first bytecode of the specified kind, or the builtin one if no
2551  * bytecode of that kind is loaded */
run_builtin_or_loaded(struct cli_all_bc * bcs,uint8_t kind,const char * builtin_cbc,struct cli_bc_ctx * ctx,const char * desc)2552 static int run_builtin_or_loaded(struct cli_all_bc *bcs, uint8_t kind, const char *builtin_cbc, struct cli_bc_ctx *ctx, const char *desc)
2553 {
2554     unsigned i, builtin = 0, rc = 0;
2555     struct cli_bc *bc = NULL;
2556 
2557     for (i = 0; i < bcs->count; i++) {
2558         bc = &bcs->all_bcs[i];
2559         if (bc->kind == kind)
2560             break;
2561     }
2562     if (i == bcs->count)
2563         bc = NULL;
2564     if (!bc) {
2565         /* no loaded bytecode found, load the builtin one! */
2566         struct cli_dbio dbio;
2567         bc = cli_calloc(1, sizeof(*bc));
2568         if (!bc) {
2569             cli_errmsg("Out of memory allocating bytecode\n");
2570             return CL_EMEM;
2571         }
2572         builtin = 1;
2573 
2574         memset(&dbio, 0, sizeof(dbio));
2575         dbio.usebuf = 1;
2576         dbio.bufpt = dbio.buf = (char *)builtin_cbc;
2577         dbio.bufsize          = strlen(builtin_cbc) + 1;
2578         if (!dbio.bufsize || dbio.bufpt[dbio.bufsize - 2] != '\n') {
2579             cli_errmsg("Invalid builtin bytecode: missing terminator\n");
2580             free(bc);
2581             return CL_EMALFDB;
2582         }
2583 
2584         rc = cli_bytecode_load(bc, NULL, &dbio, 1, 0);
2585         if (rc) {
2586             cli_errmsg("Failed to load builtin %s bytecode\n", desc);
2587             free(bc);
2588             return rc;
2589         }
2590     }
2591     rc = cli_bytecode_prepare_interpreter(bc);
2592     if (rc) {
2593         cli_errmsg("Failed to prepare %s %s bytecode for interpreter: %s\n",
2594                    builtin ? "builtin" : "loaded", desc, cl_strerror(rc));
2595     }
2596     if (bc->state != bc_interp) {
2597         cli_errmsg("Failed to prepare %s %s bytecode for interpreter\n",
2598                    builtin ? "builtin" : "loaded", desc);
2599         rc = CL_EMALFDB;
2600     }
2601     if (!rc) {
2602         cli_bytecode_context_setfuncid(ctx, bc, 0);
2603         cli_dbgmsg("Bytecode: %s running (%s)\n", desc,
2604                    builtin ? "builtin" : "loaded");
2605         rc = cli_bytecode_run(bcs, bc, ctx);
2606     }
2607     if (rc) {
2608         cli_errmsg("Failed to execute %s %s bytecode: %s\n", builtin ? "builtin" : "loaded",
2609                    desc, cl_strerror(rc));
2610     }
2611     if (builtin) {
2612         cli_bytecode_destroy(bc);
2613         free(bc);
2614     }
2615     return rc;
2616 }
2617 
cli_bytecode_prepare2(struct cl_engine * engine,struct cli_all_bc * bcs,unsigned dconfmask)2618 int cli_bytecode_prepare2(struct cl_engine *engine, struct cli_all_bc *bcs, unsigned dconfmask)
2619 {
2620     unsigned i, interp = 0, jitok = 0, jitcount = 0;
2621     int rc;
2622     struct cli_bc_ctx *ctx;
2623 
2624     if (!bcs->count) {
2625         cli_dbgmsg("No bytecodes loaded, not running builtin test\n");
2626         return CL_SUCCESS;
2627     }
2628 
2629     engine->bytecode_mode = CL_BYTECODE_MODE_AUTO;
2630     cli_detect_environment(&bcs->env);
2631     switch (bcs->env.arch) {
2632         case arch_i386:
2633         case arch_x86_64:
2634             if (!(dconfmask & BYTECODE_JIT_X86)) {
2635                 cli_dbgmsg("Bytecode: disabled on X86 via DCONF\n");
2636                 if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2637                     return CL_EBYTECODE_TESTFAIL;
2638             }
2639             break;
2640         case arch_ppc32:
2641         case arch_ppc64:
2642             if (!(dconfmask & BYTECODE_JIT_PPC)) {
2643                 cli_dbgmsg("Bytecode: disabled on PPC via DCONF\n");
2644                 if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2645                     return CL_EBYTECODE_TESTFAIL;
2646             }
2647             break;
2648         case arch_arm:
2649             if (!(dconfmask & BYTECODE_JIT_ARM)) {
2650                 cli_dbgmsg("Bytecode: disabled on ARM via DCONF\n");
2651                 if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2652                     return CL_EBYTECODE_TESTFAIL;
2653             }
2654             break;
2655         default:
2656             cli_dbgmsg("Bytecode: JIT not supported on this architecture, falling back\n");
2657             if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2658                 return CL_EBYTECODE_TESTFAIL;
2659             break;
2660     }
2661     cli_dbgmsg("Bytecode: mode is %d\n", engine->bytecode_mode);
2662 
2663     ctx = cli_bytecode_context_alloc();
2664     if (!ctx) {
2665         cli_errmsg("Bytecode: failed to allocate bytecode context\n");
2666         return CL_EMEM;
2667     }
2668     rc = run_builtin_or_loaded(bcs, BC_STARTUP, builtin_bc_startup, ctx, "BC_STARTUP");
2669     if (rc != CL_SUCCESS) {
2670         cli_warnmsg("Bytecode: BC_STARTUP failed to run, disabling ALL bytecodes! Please report to https://github.com/Cisco-Talos/clamav/issues\n");
2671         ctx->bytecode_disable_status = 2;
2672     } else {
2673         cli_dbgmsg("Bytecode: disable status is %d\n", ctx->bytecode_disable_status);
2674         rc = cli_bytecode_context_getresult_int(ctx);
2675         /* check magic number, don't use 0 here because it is too easy for a
2676 	 * buggy bytecode to return 0 */
2677         if ((unsigned int)rc != (unsigned int)0xda7aba5e) {
2678             cli_warnmsg("Bytecode: selftest failed with code %08x. Please report to https://github.com/Cisco-Talos/clamav/issues\n",
2679                         rc);
2680             if (engine->bytecode_mode == CL_BYTECODE_MODE_TEST)
2681                 return CL_EBYTECODE_TESTFAIL;
2682         }
2683     }
2684     switch (ctx->bytecode_disable_status) {
2685         case 1:
2686             if (set_mode(engine, CL_BYTECODE_MODE_INTERPRETER) == -1)
2687                 return CL_EBYTECODE_TESTFAIL;
2688             break;
2689         case 2:
2690             if (set_mode(engine, CL_BYTECODE_MODE_OFF) == -1)
2691                 return CL_EBYTECODE_TESTFAIL;
2692             break;
2693         default:
2694             break;
2695     }
2696     cli_bytecode_context_destroy(ctx);
2697 
2698     if (engine->bytecode_mode != CL_BYTECODE_MODE_INTERPRETER &&
2699         engine->bytecode_mode != CL_BYTECODE_MODE_OFF) {
2700         selfcheck(1, bcs->engine);
2701         rc = cli_bytecode_prepare_jit(bcs);
2702         if (rc == CL_SUCCESS) {
2703             jitok = 1;
2704             cli_dbgmsg("Bytecode: %u bytecode prepared with JIT\n", bcs->count);
2705             if (engine->bytecode_mode != CL_BYTECODE_MODE_TEST)
2706                 return CL_SUCCESS;
2707         }
2708         if (engine->bytecode_mode == CL_BYTECODE_MODE_JIT) {
2709             cli_errmsg("Bytecode: JIT required, but not all bytecodes could be prepared with JIT\n");
2710             return CL_EMALFDB;
2711         }
2712         if (rc && engine->bytecode_mode == CL_BYTECODE_MODE_TEST) {
2713             cli_errmsg("Bytecode: Test mode, but not all bytecodes could be prepared with JIT\n");
2714             return CL_EBYTECODE_TESTFAIL;
2715         }
2716     } else {
2717         cli_bytecode_done_jit(bcs, 0);
2718     }
2719 
2720     if (!(dconfmask & BYTECODE_INTERPRETER)) {
2721         cli_dbgmsg("Bytecode: needs interpreter, but interpreter is disabled\n");
2722         if (set_mode(engine, CL_BYTECODE_MODE_OFF) == -1)
2723             return CL_EBYTECODE_TESTFAIL;
2724     }
2725 
2726     if (engine->bytecode_mode == CL_BYTECODE_MODE_OFF) {
2727         for (i = 0; i < bcs->count; i++)
2728             bcs->all_bcs[i].state = bc_disabled;
2729         cli_dbgmsg("Bytecode: ALL bytecodes disabled\n");
2730         return CL_SUCCESS;
2731     }
2732 
2733     for (i = 0; i < bcs->count; i++) {
2734         struct cli_bc *bc = &bcs->all_bcs[i];
2735         if (bc->state == bc_jit) {
2736             jitcount++;
2737             if (engine->bytecode_mode != CL_BYTECODE_MODE_TEST)
2738                 continue;
2739         }
2740         if (bc->state == bc_interp) {
2741             interp++;
2742             continue;
2743         }
2744         rc = cli_bytecode_prepare_interpreter(bc);
2745         if (rc != CL_SUCCESS) {
2746             bc->state = bc_disabled;
2747             cli_warnmsg("Bytecode: %d failed to prepare for interpreter mode\n", bc->id);
2748             return rc;
2749         }
2750         interp++;
2751     }
2752     cli_dbgmsg("Bytecode: %u bytecode prepared with JIT, "
2753                "%u prepared with interpreter, %u total\n",
2754                jitcount, interp, bcs->count);
2755     return CL_SUCCESS;
2756 }
2757 
cli_bytecode_init(struct cli_all_bc * allbc)2758 int cli_bytecode_init(struct cli_all_bc *allbc)
2759 {
2760     int ret;
2761     memset(allbc, 0, sizeof(*allbc));
2762     ret = cli_bytecode_init_jit(allbc, 0 /*XXX*/);
2763     cli_dbgmsg("Bytecode initialized in %s mode\n",
2764                allbc->engine ? "JIT" : "interpreter");
2765     allbc->inited = 1;
2766     return ret;
2767 }
2768 
cli_bytecode_done(struct cli_all_bc * allbc)2769 int cli_bytecode_done(struct cli_all_bc *allbc)
2770 {
2771     return cli_bytecode_done_jit(allbc, 0);
2772 }
2773 
cli_bytecode_context_setfile(struct cli_bc_ctx * ctx,fmap_t * map)2774 int cli_bytecode_context_setfile(struct cli_bc_ctx *ctx, fmap_t *map)
2775 {
2776     ctx->fmap           = map;
2777     ctx->file_size      = map->len;
2778     ctx->hooks.filesize = &ctx->file_size;
2779     return 0;
2780 }
2781 
cli_bytecode_runlsig(cli_ctx * cctx,struct cli_target_info * tinfo,const struct cli_all_bc * bcs,unsigned bc_idx,const uint32_t * lsigcnt,const uint32_t * lsigsuboff,fmap_t * map)2782 int cli_bytecode_runlsig(cli_ctx *cctx, struct cli_target_info *tinfo,
2783                          const struct cli_all_bc *bcs, unsigned bc_idx,
2784                          const uint32_t *lsigcnt,
2785                          const uint32_t *lsigsuboff, fmap_t *map)
2786 {
2787     int ret;
2788     struct cli_bc_ctx ctx;
2789     const struct cli_bc *bc = &bcs->all_bcs[bc_idx - 1];
2790     struct cli_pe_hook_data pehookdata;
2791 
2792     if (bc_idx == 0)
2793         return CL_ENULLARG;
2794 
2795     memset(&ctx, 0, sizeof(ctx));
2796     cli_bytecode_context_setfuncid(&ctx, bc, 0);
2797     ctx.hooks.match_counts  = lsigcnt;
2798     ctx.hooks.match_offsets = lsigsuboff;
2799     cli_bytecode_context_setctx(&ctx, cctx);
2800     cli_bytecode_context_setfile(&ctx, map);
2801     if (tinfo && tinfo->status == 1) {
2802         ctx.sections = tinfo->exeinfo.sections;
2803         memset(&pehookdata, 0, sizeof(pehookdata));
2804         pehookdata.offset    = tinfo->exeinfo.offset;
2805         pehookdata.ep        = tinfo->exeinfo.ep;
2806         pehookdata.nsections = tinfo->exeinfo.nsections;
2807         pehookdata.hdr_size  = tinfo->exeinfo.hdr_size;
2808         ctx.hooks.pedata     = &pehookdata;
2809         ctx.resaddr          = tinfo->exeinfo.res_addr;
2810     }
2811     if (bc->hook_lsig_id) {
2812         cli_dbgmsg("hook lsig id %d matched (bc %d)\n", bc->hook_lsig_id, bc->id);
2813         /* this is a bytecode for a hook, defer running it until hook is
2814 	     * executed, so that it has all the info for the hook */
2815         if (cctx->hook_lsig_matches)
2816             cli_bitset_set(cctx->hook_lsig_matches, bc->hook_lsig_id - 1);
2817         /* save match counts */
2818         memcpy(&ctx.lsigcnt, lsigcnt, 64 * 4);
2819         memcpy(&ctx.lsigoff, lsigsuboff, 64 * 4);
2820         cli_bytecode_context_clear(&ctx);
2821         return CL_SUCCESS;
2822     }
2823 
2824     cli_dbgmsg("Running bytecode for logical signature match\n");
2825     ret = cli_bytecode_run(bcs, bc, &ctx);
2826     if (ret != CL_SUCCESS) {
2827         cli_warnmsg("Bytecode %u failed to run: %s\n", bc->id, cl_strerror(ret));
2828         cli_bytecode_context_clear(&ctx);
2829         return CL_SUCCESS;
2830     }
2831     if (ctx.virname) {
2832         if (cctx->num_viruses == 0) {
2833             int rc;
2834             cli_dbgmsg("Bytecode found virus: %s\n", ctx.virname);
2835             if (!strncmp(ctx.virname, "BC.Heuristics", 13))
2836                 rc = cli_append_possibly_unwanted(cctx, ctx.virname);
2837             else
2838                 rc = cli_append_virus(cctx, ctx.virname);
2839             cli_bytecode_context_clear(&ctx);
2840             return rc;
2841         } else {
2842             cli_bytecode_context_clear(&ctx);
2843             return CL_VIRUS;
2844         }
2845     }
2846     ret = cli_bytecode_context_getresult_int(&ctx);
2847     cli_dbgmsg("Bytecode %u returned code: %u\n", bc->id, ret);
2848     cli_bytecode_context_clear(&ctx);
2849     return CL_SUCCESS;
2850 }
2851 
cli_bytecode_runhook(cli_ctx * cctx,const struct cl_engine * engine,struct cli_bc_ctx * ctx,unsigned id,fmap_t * map)2852 int cli_bytecode_runhook(cli_ctx *cctx, const struct cl_engine *engine, struct cli_bc_ctx *ctx,
2853                          unsigned id, fmap_t *map)
2854 {
2855     const unsigned *hooks = engine->hooks[id - _BC_START_HOOKS];
2856     unsigned i, hooks_cnt = engine->hooks_cnt[id - _BC_START_HOOKS];
2857     int ret;
2858     unsigned executed = 0, breakflag = 0, errorflag = 0;
2859 
2860     if (!cctx)
2861         return CL_ENULLARG;
2862 
2863     cli_dbgmsg("Bytecode executing hook id %u (%u hooks)\n", id, hooks_cnt);
2864     /* restore match counts */
2865     cli_bytecode_context_setfile(ctx, map);
2866     ctx->hooks.match_counts  = ctx->lsigcnt;
2867     ctx->hooks.match_offsets = ctx->lsigoff;
2868     for (i = 0; i < hooks_cnt; i++) {
2869         const struct cli_bc *bc = &engine->bcs.all_bcs[hooks[i]];
2870         if (bc->lsig) {
2871             if (!cctx->hook_lsig_matches ||
2872                 !cli_bitset_test(cctx->hook_lsig_matches, bc->hook_lsig_id - 1))
2873                 continue;
2874             cli_dbgmsg("Bytecode: executing bytecode %u (lsig matched)\n", bc->id);
2875         }
2876         cli_bytecode_context_setfuncid(ctx, bc, 0);
2877         ret = cli_bytecode_run(&engine->bcs, bc, ctx);
2878         executed++;
2879         if (ret != CL_SUCCESS) {
2880             cli_warnmsg("Bytecode %u failed to run: %s\n", bc->id, cl_strerror(ret));
2881             errorflag = 1;
2882             continue;
2883         }
2884         if (ctx->virname) {
2885             cli_dbgmsg("Bytecode runhook found virus: %s\n", ctx->virname);
2886             cli_append_virus(cctx, ctx->virname);
2887             if (!(cctx->options->general & CL_SCAN_GENERAL_ALLMATCHES)) {
2888                 cli_bytecode_context_clear(ctx);
2889                 return CL_VIRUS;
2890             }
2891             cli_bytecode_context_reset(ctx);
2892             continue;
2893         }
2894         ret = cli_bytecode_context_getresult_int(ctx);
2895         /* TODO: use prefix here */
2896         cli_dbgmsg("Bytecode %u returned %u\n", bc->id, ret);
2897         if (ret == 0xcea5e) {
2898             cli_dbgmsg("Bytecode set BREAK flag in hook!\n");
2899             breakflag = 1;
2900         }
2901         if (!ret) {
2902             char *tempfile;
2903             int fd = cli_bytecode_context_getresult_file(ctx, &tempfile);
2904             if (fd && fd != -1) {
2905                 if (cctx->engine->keeptmp)
2906                     cli_dbgmsg("Bytecode %u unpacked file saved in %s\n",
2907                                bc->id, tempfile);
2908                 else
2909                     cli_dbgmsg("Bytecode %u unpacked file\n", bc->id);
2910                 lseek(fd, 0, SEEK_SET);
2911                 cli_dbgmsg("***** Scanning unpacked file ******\n");
2912 
2913                 ret = cli_magic_scan_desc(fd, tempfile, cctx, NULL);
2914 
2915                 if (!cctx->engine->keeptmp)
2916                     if (ftruncate(fd, 0) == -1)
2917                         cli_dbgmsg("ftruncate failed on %d\n", fd);
2918                 close(fd);
2919                 if (!cctx->engine->keeptmp) {
2920                     if (tempfile && cli_unlink(tempfile))
2921                         ret = CL_EUNLINK;
2922                 }
2923                 free(tempfile);
2924                 if (ret != CL_CLEAN) {
2925                     if (ret == CL_VIRUS) {
2926                         cli_dbgmsg("Scanning unpacked file by bytecode %u found a virus\n", bc->id);
2927                         if (cctx->options->general & CL_SCAN_GENERAL_ALLMATCHES) {
2928                             cli_bytecode_context_reset(ctx);
2929                             continue;
2930                         }
2931                         cli_bytecode_context_clear(ctx);
2932                         return ret;
2933                     }
2934                 }
2935                 cli_bytecode_context_reset(ctx);
2936                 continue;
2937             }
2938         }
2939         cli_bytecode_context_reset(ctx);
2940     }
2941     if (executed)
2942         cli_dbgmsg("Bytecode: executed %u bytecodes for this hook\n", executed);
2943     else
2944         cli_dbgmsg("Bytecode: no logical signature matched, no bytecode executed\n");
2945     if (errorflag && cctx->engine->bytecode_mode == CL_BYTECODE_MODE_TEST)
2946         return CL_EBYTECODE_TESTFAIL;
2947     return breakflag ? CL_BREAK : CL_CLEAN;
2948 }
2949 
cli_bytecode_context_setpe(struct cli_bc_ctx * ctx,const struct cli_pe_hook_data * data,const struct cli_exe_section * sections)2950 int cli_bytecode_context_setpe(struct cli_bc_ctx *ctx, const struct cli_pe_hook_data *data, const struct cli_exe_section *sections)
2951 {
2952     ctx->sections     = sections;
2953     ctx->hooks.pedata = data;
2954     return 0;
2955 }
2956 
cli_bytecode_context_setctx(struct cli_bc_ctx * ctx,void * cctx)2957 void cli_bytecode_context_setctx(struct cli_bc_ctx *ctx, void *cctx)
2958 {
2959     ctx->ctx              = cctx;
2960     ctx->bytecode_timeout = ((cli_ctx *)cctx)->engine->bytecode_timeout;
2961 }
2962 
cli_bytecode_describe(const struct cli_bc * bc)2963 void cli_bytecode_describe(const struct cli_bc *bc)
2964 {
2965     char buf[128];
2966     int cols;
2967     unsigned i;
2968     time_t stamp;
2969     int had;
2970 
2971     if (!bc) {
2972         printf("(null bytecode)\n");
2973         return;
2974     }
2975 
2976     stamp = bc->metadata.timestamp;
2977     printf("Bytecode format functionality level: %u\n", bc->metadata.formatlevel);
2978     printf("Bytecode metadata:\n\tcompiler version: %s\n",
2979            bc->metadata.compiler ? bc->metadata.compiler : "N/A");
2980     printf("\tcompiled on: (%d) %s",
2981            (uint32_t)stamp,
2982            cli_ctime(&stamp, buf, sizeof(buf)));
2983     printf("\tcompiled by: %s\n", bc->metadata.sigmaker ? bc->metadata.sigmaker : "N/A");
2984     /*TODO: parse and display arch name, also take it into account when
2985       JITing*/
2986     printf("\ttarget exclude: %d\n", bc->metadata.targetExclude);
2987     printf("\tbytecode type: ");
2988     switch (bc->kind) {
2989         case BC_GENERIC:
2990             puts("generic, not loadable by clamscan/clamd");
2991             break;
2992         case BC_STARTUP:
2993             puts("run on startup (unique)");
2994             break;
2995         case BC_LOGICAL:
2996             puts("logical only");
2997             break;
2998         case BC_PE_UNPACKER:
2999             puts("PE unpacker hook");
3000             break;
3001         case BC_PE_ALL:
3002             puts("all PE hook");
3003             break;
3004         case BC_PRECLASS:
3005             puts("preclass hook");
3006             break;
3007         case BC_ELF_UNPACKER:
3008             puts("ELF unpacker hook");
3009             break;
3010         case BC_MACHO_UNPACKER:
3011             puts("Mach-O unpacker hook");
3012             break;
3013         default:
3014             printf("Unknown (type %u)", bc->kind);
3015             break;
3016     }
3017     /* 0 means no limit */
3018     printf("\tbytecode functionality level: %u - %u\n",
3019            bc->metadata.minfunc, bc->metadata.maxfunc);
3020     printf("\tbytecode logical signature: %s\n",
3021            bc->lsig ? bc->lsig : "<none>");
3022     printf("\tvirusname prefix: %s\n",
3023            bc->vnameprefix);
3024     printf("\tvirusnames: %u\n", bc->vnames_cnt);
3025     printf("\tbytecode triggered on: ");
3026     switch (bc->kind) {
3027         case BC_GENERIC:
3028             puts("N/A (loaded in clambc only)");
3029             break;
3030         case BC_LOGICAL:
3031             puts("files matching logical signature");
3032             break;
3033         case BC_PE_UNPACKER:
3034             if (bc->lsig)
3035                 puts("PE files matching logical signature (unpacked)");
3036             else
3037                 puts("all PE files! (unpacked)");
3038             break;
3039         case BC_PDF:
3040             puts("PDF files");
3041             break;
3042         case BC_PE_ALL:
3043             if (bc->lsig)
3044                 puts("PE files matching logical signature");
3045             else
3046                 puts("all PE files!");
3047             break;
3048         case BC_PRECLASS:
3049             if (bc->lsig)
3050                 puts("PRECLASS files matching logical signature");
3051             else
3052                 puts("all PRECLASS files!");
3053             break;
3054         case BC_ELF_UNPACKER:
3055             if (bc->lsig)
3056                 puts("ELF files matching logical signature (unpacked)");
3057             else
3058                 puts("all ELF files! (unpacked)");
3059             break;
3060         case BC_MACHO_UNPACKER:
3061             if (bc->lsig)
3062                 puts("Mach-O files matching logical signature (unpacked)");
3063             else
3064                 puts("all Mach-O files! (unpacked)");
3065             break;
3066         default:
3067             puts("N/A (unknown type)\n");
3068             break;
3069     }
3070     printf("\tnumber of functions: %u\n\tnumber of types: %u\n",
3071            bc->num_func, bc->num_types);
3072     printf("\tnumber of global constants: %u\n", (unsigned)bc->num_globals);
3073     printf("\tnumber of debug nodes: %u\n", bc->dbgnode_cnt);
3074     printf("\tbytecode APIs used:");
3075     cols = 0; /* remaining */
3076     had  = 0;
3077     for (i = 0; i < cli_apicall_maxapi; i++) {
3078         if (cli_bitset_test(bc->uses_apis, i)) {
3079             unsigned len = strlen(cli_apicalls[i].name);
3080             if (had)
3081                 printf(",");
3082             if (len > (unsigned int)cols) {
3083                 printf("\n\t");
3084                 cols = 72;
3085             }
3086             printf(" %s", cli_apicalls[i].name);
3087             had = 1;
3088             cols -= len;
3089         }
3090     }
3091     printf("\n");
3092 }
3093 
3094 const char *bc_tystr[] = {
3095     "DFunctionType",
3096     "DPointerType",
3097     "DStructType",
3098     "DPackedStructType",
3099     "DArrayType"};
3100 
3101 const char *bc_opstr[] = {
3102     "OP_BC_NULL",
3103     "OP_BC_ADD", /* =1*/
3104     "OP_BC_SUB",
3105     "OP_BC_MUL",
3106     "OP_BC_UDIV",
3107     "OP_BC_SDIV",
3108     "OP_BC_UREM",
3109     "OP_BC_SREM",
3110     "OP_BC_SHL",
3111     "OP_BC_LSHR",
3112     "OP_BC_ASHR",
3113     "OP_BC_AND",
3114     "OP_BC_OR",
3115     "OP_BC_XOR",
3116 
3117     "OP_BC_TRUNC",
3118     "OP_BC_SEXT",
3119     "OP_BC_ZEXT",
3120 
3121     "OP_BC_BRANCH",
3122     "OP_BC_JMP",
3123     "OP_BC_RET",
3124     "OP_BC_RET_VOID",
3125 
3126     "OP_BC_ICMP_EQ",
3127     "OP_BC_ICMP_NE",
3128     "OP_BC_ICMP_UGT",
3129     "OP_BC_ICMP_UGE",
3130     "OP_BC_ICMP_ULT",
3131     "OP_BC_ICMP_ULE",
3132     "OP_BC_ICMP_SGT",
3133     "OP_BC_ICMP_SGE",
3134     "OP_BC_ICMP_SLE",
3135     "OP_BC_ICMP_SLT",
3136     "OP_BC_SELECT",
3137     "OP_BC_CALL_DIRECT",
3138     "OP_BC_CALL_API",
3139     "OP_BC_COPY",
3140     "OP_BC_GEP1",
3141     "OP_BC_GEPZ",
3142     "OP_BC_GEPN",
3143     "OP_BC_STORE",
3144     "OP_BC_LOAD",
3145     "OP_BC_MEMSET",
3146     "OP_BC_MEMCPY",
3147     "OP_BC_MEMMOVE",
3148     "OP_BC_MEMCMP",
3149     "OP_BC_ISBIGENDIAN",
3150     "OP_BC_ABORT",
3151     "OP_BC_BSWAP16",
3152     "OP_BC_BSWAP32",
3153     "OP_BC_BSWAP64",
3154     "OP_BC_PTRDIFF32",
3155     "OP_BC_PTRTOINT64",
3156     "OP_BC_INVALID" /* last */
3157 };
3158 
3159 extern unsigned cli_numapicalls;
cli_bytetype_helper(const struct cli_bc * bc,unsigned tid)3160 static void cli_bytetype_helper(const struct cli_bc *bc, unsigned tid)
3161 {
3162     unsigned i, j;
3163     const struct cli_bc_type *ty;
3164 
3165     if (tid & 0x8000) {
3166         printf("alloc ");
3167         tid &= 0x7fff;
3168     }
3169 
3170     if (tid < 65) {
3171         printf("i%d", tid);
3172         return;
3173     }
3174 
3175     i = tid - 65;
3176     if (i >= bc->num_types) {
3177         printf("invalid type");
3178         return;
3179     }
3180     ty = &bc->types[i];
3181 
3182     switch (ty->kind) {
3183         case DFunctionType:
3184             cli_bytetype_helper(bc, ty->containedTypes[0]);
3185             printf(" func ( ");
3186             for (j = 1; j < ty->numElements; ++j) {
3187                 cli_bytetype_helper(bc, ty->containedTypes[0]);
3188                 printf(" ");
3189             }
3190             printf(")");
3191             break;
3192         case DPointerType:
3193             cli_bytetype_helper(bc, ty->containedTypes[0]);
3194             printf("*");
3195             break;
3196         case DStructType:
3197         case DPackedStructType:
3198             printf("{ ");
3199             for (j = 0; j < ty->numElements; ++j) {
3200                 cli_bytetype_helper(bc, ty->containedTypes[0]);
3201                 printf(" ");
3202             }
3203             printf("}");
3204             break;
3205         case DArrayType:
3206             printf("[");
3207             printf("%d x ", ty->numElements);
3208             cli_bytetype_helper(bc, ty->containedTypes[0]);
3209             printf("]");
3210             break;
3211         default:
3212             printf("unhandled type kind %d, cannot parse", ty->kind);
3213             break;
3214     }
3215 }
3216 
cli_bytetype_describe(const struct cli_bc * bc)3217 void cli_bytetype_describe(const struct cli_bc *bc)
3218 {
3219     unsigned i, tid;
3220 
3221     printf("found %d extra types of %d total, starting at tid %d\n",
3222            bc->num_types, 64 + bc->num_types, bc->start_tid);
3223 
3224     printf("TID  KIND                INTERNAL\n");
3225     printf("------------------------------------------------------------------------\n");
3226     for (i = 0, tid = 65; i < bc->num_types - 1; ++i, ++tid) {
3227         printf("%3d: %-20s", tid, bc_tystr[bc->types[i].kind]);
3228         cli_bytetype_helper(bc, tid);
3229         printf("\n");
3230     }
3231     printf("------------------------------------------------------------------------\n");
3232 }
3233 
cli_bytevalue_describe(const struct cli_bc * bc,unsigned funcid)3234 void cli_bytevalue_describe(const struct cli_bc *bc, unsigned funcid)
3235 {
3236     unsigned i, total = 0;
3237     const struct cli_bc_func *func;
3238 
3239     if (funcid >= bc->num_func) {
3240         printf("bytecode diagnostic: funcid [%u] outside bytecode numfuncs [%u]\n",
3241                funcid, bc->num_func);
3242         return;
3243     }
3244     // globals
3245     printf("found a total of %zu globals\n", bc->num_globals);
3246     printf("GID  ID    VALUE\n");
3247     printf("------------------------------------------------------------------------\n");
3248     for (i = 0; i < bc->num_globals; ++i) {
3249         printf("%3u [%3u]: ", i, i);
3250         cli_bytetype_helper(bc, bc->globaltys[i]);
3251         printf(" unknown\n");
3252     }
3253     printf("------------------------------------------------------------------------\n");
3254 
3255     // arguments and local values
3256     func = &bc->funcs[funcid];
3257     printf("found %d values with %d arguments and %d locals\n",
3258            func->numValues, func->numArgs, func->numLocals);
3259     printf("VID  ID    VALUE\n");
3260     printf("------------------------------------------------------------------------\n");
3261     for (i = 0; i < func->numValues; ++i) {
3262         printf("%3u [%3u]: ", i, total++);
3263         cli_bytetype_helper(bc, func->types[i]);
3264         if (i < func->numArgs)
3265             printf(" argument");
3266         printf("\n");
3267     }
3268     printf("------------------------------------------------------------------------\n");
3269 
3270     // constants
3271     printf("found a total of %d constants\n", func->numConstants);
3272     printf("CID  ID    VALUE\n");
3273     printf("------------------------------------------------------------------------\n");
3274     for (i = 0; i < func->numConstants; ++i) {
3275         printf("%3u [%3u]: " STDu64 "(0x" STDx64 ")\n", i, total++, func->constants[i], func->constants[i]);
3276     }
3277     printf("------------------------------------------------------------------------\n");
3278     printf("found a total of %u total values\n", total);
3279     printf("------------------------------------------------------------------------\n");
3280     return;
3281 }
3282 
cli_byteinst_describe(const struct cli_bc_inst * inst,unsigned * bbnum)3283 void cli_byteinst_describe(const struct cli_bc_inst *inst, unsigned *bbnum)
3284 {
3285     unsigned j;
3286     char inst_str[256];
3287     const struct cli_apicall *api;
3288 
3289     if (inst->opcode > OP_BC_INVALID) {
3290         printf("opcode %u[%u] of type %u is not implemented yet!",
3291                inst->opcode, inst->interp_op / 5, inst->interp_op % 5);
3292         return;
3293     }
3294 
3295     snprintf(inst_str, sizeof(inst_str), "%-20s[%-3d/%3d/%3d]", bc_opstr[inst->opcode],
3296              inst->opcode, inst->interp_op, inst->interp_op % inst->opcode);
3297     printf("%-35s", inst_str);
3298     switch (inst->opcode) {
3299             // binary operations
3300         case OP_BC_ADD:
3301             printf("%d = %d + %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3302             break;
3303         case OP_BC_SUB:
3304             printf("%d = %d - %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3305             break;
3306         case OP_BC_MUL:
3307             printf("%d = %d * %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3308             break;
3309         case OP_BC_UDIV:
3310             printf("%d = %d / %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3311             break;
3312         case OP_BC_SDIV:
3313             printf("%d = %d / %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3314             break;
3315         case OP_BC_UREM:
3316             printf("%d = %d %% %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3317             break;
3318         case OP_BC_SREM:
3319             printf("%d = %d %% %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3320             break;
3321         case OP_BC_SHL:
3322             printf("%d = %d << %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3323             break;
3324         case OP_BC_LSHR:
3325             printf("%d = %d >> %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3326             break;
3327         case OP_BC_ASHR:
3328             printf("%d = %d >> %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3329             break;
3330         case OP_BC_AND:
3331             printf("%d = %d & %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3332             break;
3333         case OP_BC_OR:
3334             printf("%d = %d | %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3335             break;
3336         case OP_BC_XOR:
3337             printf("%d = %d ^ %d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3338             break;
3339 
3340             // casting operations
3341         case OP_BC_TRUNC:
3342             printf("%d = %d trunc " STDx64, inst->dest, inst->u.cast.source, inst->u.cast.mask);
3343             break;
3344         case OP_BC_SEXT:
3345             printf("%d = %d sext " STDx64, inst->dest, inst->u.cast.source, inst->u.cast.mask);
3346             break;
3347         case OP_BC_ZEXT:
3348             printf("%d = %d zext " STDx64, inst->dest, inst->u.cast.source, inst->u.cast.mask);
3349             break;
3350 
3351             // control operations (termination instructions)
3352         case OP_BC_BRANCH:
3353             printf("br %d ? bb.%d : bb.%d", inst->u.branch.condition,
3354                    inst->u.branch.br_true, inst->u.branch.br_false);
3355             (*bbnum)++;
3356             break;
3357         case OP_BC_JMP:
3358             printf("jmp bb.%d", inst->u.jump);
3359             (*bbnum)++;
3360             break;
3361         case OP_BC_RET:
3362             printf("ret %d", inst->u.unaryop);
3363             (*bbnum)++;
3364             break;
3365         case OP_BC_RET_VOID:
3366             printf("ret void");
3367             (*bbnum)++;
3368             break;
3369 
3370             // comparison operations
3371         case OP_BC_ICMP_EQ:
3372             printf("%d = (%d == %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3373             break;
3374         case OP_BC_ICMP_NE:
3375             printf("%d = (%d != %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3376             break;
3377         case OP_BC_ICMP_UGT:
3378             printf("%d = (%d > %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3379             break;
3380         case OP_BC_ICMP_UGE:
3381             printf("%d = (%d >= %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3382             break;
3383         case OP_BC_ICMP_ULT:
3384             printf("%d = (%d < %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3385             break;
3386         case OP_BC_ICMP_ULE:
3387             printf("%d = (%d >= %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3388             break;
3389         case OP_BC_ICMP_SGT:
3390             printf("%d = (%d > %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3391             break;
3392         case OP_BC_ICMP_SGE:
3393             printf("%d = (%d >= %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3394             break;
3395         case OP_BC_ICMP_SLE:
3396             printf("%d = (%d <= %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3397             break;
3398         case OP_BC_ICMP_SLT:
3399             printf("%d = (%d < %d)", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3400             break;
3401         case OP_BC_SELECT:
3402             printf("%d = %d ? %d : %d)", inst->dest, inst->u.three[0],
3403                    inst->u.three[1], inst->u.three[2]);
3404             break;
3405 
3406             // function calling
3407         case OP_BC_CALL_DIRECT:
3408             printf("%d = call F.%d (", inst->dest, inst->u.ops.funcid);
3409             for (j = 0; j < inst->u.ops.numOps; ++j) {
3410                 if (j == inst->u.ops.numOps - 1) {
3411                     printf("%d", inst->u.ops.ops[j]);
3412                 } else {
3413                     printf("%d, ", inst->u.ops.ops[j]);
3414                 }
3415             }
3416             printf(")");
3417             break;
3418         case OP_BC_CALL_API: {
3419             if (inst->u.ops.funcid > cli_numapicalls) {
3420                 printf("apicall FID %d not yet implemented!\n", inst->u.ops.funcid);
3421                 break;
3422             }
3423             api = &cli_apicalls[inst->u.ops.funcid];
3424             switch (api->kind) {
3425                 case 0:
3426                     printf("%d = %s[%d] (%d, %d)", inst->dest, api->name,
3427                            inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1]);
3428                     break;
3429                 case 1:
3430                     printf("%d = %s[%d] (p.%d, %d)", inst->dest, api->name,
3431                            inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1]);
3432                     break;
3433                 case 2:
3434                     printf("%d = %s[%d] (%d)", inst->dest, api->name,
3435                            inst->u.ops.funcid, inst->u.ops.ops[0]);
3436                     break;
3437                 case 3:
3438                     printf("p.%d = %s[%d] (%d)", inst->dest, api->name,
3439                            inst->u.ops.funcid, inst->u.ops.ops[0]);
3440                     break;
3441                 case 4:
3442                     printf("%d = %s[%d] (p.%d, %d, %d, %d, %d)", inst->dest, api->name,
3443                            inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1],
3444                            inst->u.ops.ops[2], inst->u.ops.ops[3], inst->u.ops.ops[4]);
3445                     break;
3446                 case 5:
3447                     printf("%d = %s[%d] ()", inst->dest, api->name,
3448                            inst->u.ops.funcid);
3449                     break;
3450                 case 6:
3451                     printf("p.%d = %s[%d] (%d, %d)", inst->dest, api->name,
3452                            inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1]);
3453                     break;
3454                 case 7:
3455                     printf("%d = %s[%d] (%d, %d, %d)", inst->dest, api->name,
3456                            inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1],
3457                            inst->u.ops.ops[2]);
3458                     break;
3459                 case 8:
3460                     printf("%d = %s[%d] (p.%d, %d, p.%d, %d)", inst->dest, api->name,
3461                            inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1],
3462                            inst->u.ops.ops[2], inst->u.ops.ops[3]);
3463                     break;
3464                 case 9:
3465                     printf("%d = %s[%d] (p.%d, %d, %d)", inst->dest, api->name,
3466                            inst->u.ops.funcid, inst->u.ops.ops[0], inst->u.ops.ops[1],
3467                            inst->u.ops.ops[2]);
3468                     break;
3469                 default:
3470                     printf("type %u apicalls not yet implemented!\n", api->kind);
3471                     break;
3472             }
3473         } break;
3474 
3475             // memory operations
3476         case OP_BC_COPY:
3477             printf("cp %d -> %d", inst->u.binop[0], inst->u.binop[1]);
3478             break;
3479         case OP_BC_GEP1:
3480             printf("%d = gep1 p.%d + (%d * %d)", inst->dest, inst->u.three[1],
3481                    inst->u.three[2], inst->u.three[0]);
3482             break;
3483         case OP_BC_GEPZ:
3484             printf("%d = gepz p.%d + (%d)", inst->dest,
3485                    inst->u.three[1], inst->u.three[2]);
3486             break;
3487         case OP_BC_GEPN:
3488             printf("illegal opcode, impossible");
3489             break;
3490         case OP_BC_STORE:
3491             printf("store %d -> p.%d", inst->u.binop[0], inst->u.binop[1]);
3492             break;
3493         case OP_BC_LOAD:
3494             printf("load  %d <- p.%d", inst->dest, inst->u.unaryop);
3495             break;
3496 
3497             // llvm intrinsics
3498         case OP_BC_MEMSET:
3499             printf("%d = memset (p.%d, %d, %d)", inst->dest, inst->u.three[0],
3500                    inst->u.three[1], inst->u.three[2]);
3501             break;
3502         case OP_BC_MEMCPY:
3503             printf("%d = memcpy (p.%d, p.%d, %d)", inst->dest, inst->u.three[0],
3504                    inst->u.three[1], inst->u.three[2]);
3505             break;
3506         case OP_BC_MEMMOVE:
3507             printf("%d = memmove (p.%d, p.%d, %d)", inst->dest, inst->u.three[0],
3508                    inst->u.three[1], inst->u.three[2]);
3509             break;
3510         case OP_BC_MEMCMP:
3511             printf("%d = memcmp (p.%d, p.%d, %d)", inst->dest, inst->u.three[0],
3512                    inst->u.three[1], inst->u.three[2]);
3513             break;
3514 
3515             // utility operations
3516         case OP_BC_ISBIGENDIAN:
3517             printf("%d = isbigendian()", inst->dest);
3518             break;
3519         case OP_BC_ABORT:
3520             printf("ABORT!!");
3521             break;
3522         case OP_BC_BSWAP16:
3523             printf("%d = bswap16 %d", inst->dest, inst->u.unaryop);
3524             break;
3525         case OP_BC_BSWAP32:
3526             printf("%d = bswap32 %d", inst->dest, inst->u.unaryop);
3527             break;
3528         case OP_BC_BSWAP64:
3529             printf("%d = bswap64 %d", inst->dest, inst->u.unaryop);
3530             break;
3531         case OP_BC_PTRDIFF32:
3532             printf("%d = ptrdiff32 p.%d p.%d", inst->dest, inst->u.binop[0], inst->u.binop[1]);
3533             break;
3534         case OP_BC_PTRTOINT64:
3535             printf("%d = ptrtoint64 p.%d", inst->dest, inst->u.unaryop);
3536             break;
3537         case OP_BC_INVALID: /* last */
3538             printf("INVALID!!");
3539             break;
3540 
3541         default:
3542             // redundant check
3543             printf("opcode %u[%u] of type %u is not implemented yet!",
3544                    inst->opcode, inst->interp_op / 5, inst->interp_op % 5);
3545             break;
3546     }
3547 }
3548 
cli_bytefunc_describe(const struct cli_bc * bc,unsigned funcid)3549 void cli_bytefunc_describe(const struct cli_bc *bc, unsigned funcid)
3550 {
3551     unsigned i, bbnum, bbpre;
3552     const struct cli_bc_func *func;
3553 
3554     if (funcid >= bc->num_func) {
3555         printf("bytecode diagnostic: funcid [%u] outside bytecode numfuncs [%u]\n",
3556                funcid, bc->num_func);
3557         return;
3558     }
3559 
3560     func = &bc->funcs[funcid];
3561 
3562     printf("FUNCTION ID: F.%d -> NUMINSTS %d\n", funcid, func->numInsts);
3563     printf("BB   IDX  OPCODE              [ID /IID/MOD]  INST\n");
3564     printf("------------------------------------------------------------------------\n");
3565     bbpre = 0;
3566     bbnum = 0;
3567     for (i = 0; i < func->numInsts; ++i) {
3568         if (bbpre != bbnum) {
3569             printf("\n");
3570             bbpre = bbnum;
3571         }
3572 
3573         printf("%3d  %3d  ", bbnum, i);
3574         cli_byteinst_describe(&func->allinsts[i], &bbnum);
3575         printf("\n");
3576     }
3577     printf("------------------------------------------------------------------------\n");
3578 }
3579