1 /*
2 Copyright (c) 2013-2018. The YARA Authors. All Rights Reserved.
3 
4 Redistribution and use in source and binary forms, with or without modification,
5 are permitted provided that the following conditions are met:
6 
7 1. Redistributions of source code must retain the above copyright notice, this
8 list of conditions and the following disclaimer.
9 
10 2. Redistributions in binary form must reproduce the above copyright notice,
11 this list of conditions and the following disclaimer in the documentation and/or
12 other materials provided with the distribution.
13 
14 3. Neither the name of the copyright holder nor the names of its contributors
15 may be used to endorse or promote products derived from this software without
16 specific prior written permission.
17 
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 #include <assert.h>
31 #include <fcntl.h>
32 #include <stddef.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <sys/stat.h>
36 
37 #ifdef _MSC_VER
38 #include <io.h>
39 #include <share.h>
40 #else
41 #include <unistd.h>
42 #endif
43 
44 #include <yara/compiler.h>
45 #include <yara/error.h>
46 #include <yara/exec.h>
47 #include <yara/lexer.h>
48 #include <yara/libyara.h>
49 #include <yara/mem.h>
50 #include <yara/object.h>
51 #include <yara/strutils.h>
52 #include <yara/utils.h>
53 
_yr_compiler_default_include_free(const char * callback_result_ptr,void * user_data)54 static void _yr_compiler_default_include_free(
55     const char* callback_result_ptr,
56     void* user_data)
57 {
58   if (callback_result_ptr != NULL)
59   {
60     yr_free((void*) callback_result_ptr);
61   }
62 }
63 
_yr_compiler_default_include_callback(const char * include_name,const char * calling_rule_filename,const char * calling_rule_namespace,void * user_data)64 const char* _yr_compiler_default_include_callback(
65     const char* include_name,
66     const char* calling_rule_filename,
67     const char* calling_rule_namespace,
68     void* user_data)
69 {
70 #ifndef _MSC_VER
71   struct stat stbuf;
72 #endif
73 
74   char* file_buffer;
75 
76 #ifdef _MSC_VER
77   long file_size;
78 #else
79   off_t file_size;
80 #endif
81 
82   int fd = -1;
83 
84 #if defined(_MSC_VER)
85   _sopen_s(&fd, include_name, _O_RDONLY | _O_BINARY, _SH_DENYWR, _S_IREAD);
86 #elif defined(_WIN32) || defined(__CYGWIN__)
87   fd = open(include_name, O_RDONLY | O_BINARY);
88 #else
89   fd = open(include_name, O_RDONLY);
90 #endif
91 
92   if (fd == -1)
93     return NULL;
94 
95 #ifdef _MSC_VER
96   file_size = _filelength(fd);
97   if (file_size == -1)
98   {
99     _close(fd);
100     return NULL;
101   }
102 #else
103   if ((fstat(fd, &stbuf) != 0) || (!S_ISREG(stbuf.st_mode)))
104   {
105     close(fd);
106     return NULL;
107   }
108   file_size = stbuf.st_size;
109 #endif
110 
111   file_buffer = (char*) yr_malloc((size_t) file_size + 1);
112 
113   if (file_buffer == NULL)
114   {
115 #ifdef _MSC_VER
116     _close(fd);
117 #else
118     close(fd);
119 #endif
120 
121     return NULL;
122   }
123 
124   if (file_size != read(fd, file_buffer, (size_t) file_size))
125   {
126     yr_free(file_buffer);
127 
128 #ifdef _MSC_VER
129     _close(fd);
130 #else
131     close(fd);
132 #endif
133 
134     return NULL;
135   }
136   else
137   {
138     file_buffer[file_size] = '\0';
139   }
140 
141 #ifdef _MSC_VER
142   _close(fd);
143 #else
144   close(fd);
145 #endif
146 
147   return file_buffer;
148 }
149 
150 ////////////////////////////////////////////////////////////////////////////////
151 // Returns a rule given its index in the rules table.
152 //
153 // The returned pointer is valid as long as no other rule is written to the
154 // table. This is because the write operation may cause the table to be moved to
155 // a different location in memory. Use the pointer only in a limited scope where
156 // you can be sure that no other rule is being written during the pointer's
157 // lifetime.
158 //
_yr_compiler_get_rule_by_idx(YR_COMPILER * compiler,uint32_t rule_idx)159 YR_RULE* _yr_compiler_get_rule_by_idx(YR_COMPILER* compiler, uint32_t rule_idx)
160 {
161   return (YR_RULE*) yr_arena_get_ptr(
162       compiler->arena, YR_RULES_TABLE, rule_idx * sizeof(YR_RULE));
163 }
164 
165 ////////////////////////////////////////////////////////////////////////////////
166 // Stores some data in the YR_SZ_POOL and returns a reference to it.
167 //
168 // If the same data was already stored in a previous call to this function the
169 // data is not written again, a reference to the existing data is returned
170 // instead.
171 //
_yr_compiler_store_data(YR_COMPILER * compiler,const void * data,size_t data_length,YR_ARENA_REF * ref)172 int _yr_compiler_store_data(
173     YR_COMPILER* compiler,
174     const void* data,
175     size_t data_length,
176     YR_ARENA_REF* ref)
177 {
178   // Check if the data is already in YR_SZ_POOL by using a hash table.
179   uint32_t offset = yr_hash_table_lookup_uint32_raw_key(
180       compiler->sz_table, data, data_length, NULL);
181 
182   if (offset == UINT32_MAX)
183   {
184     // The data was not previously written to YR_SZ_POOL, write it and store
185     // the reference's offset in the hash table. Storing the buffer number
186     // is not necessary, it's always YR_SZ_POOL.
187     FAIL_ON_ERROR(yr_arena_write_data(
188         compiler->arena, YR_SZ_POOL, data, data_length, ref));
189 
190     FAIL_ON_ERROR(yr_hash_table_add_uint32_raw_key(
191         compiler->sz_table, data, data_length, NULL, ref->offset));
192   }
193   else
194   {
195     ref->buffer_id = YR_SZ_POOL;
196     ref->offset = offset;
197   }
198 
199   return ERROR_SUCCESS;
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 // Similar to _yr_compiler_store_data, but receives a null-terminated string.
204 //
_yr_compiler_store_string(YR_COMPILER * compiler,const char * string,YR_ARENA_REF * ref)205 int _yr_compiler_store_string(
206     YR_COMPILER* compiler,
207     const char* string,
208     YR_ARENA_REF* ref)
209 {
210   return _yr_compiler_store_data(
211       compiler,
212       (void*) string,
213       strlen(string) + 1,  // include the null terminator
214       ref);
215 }
216 
yr_compiler_create(YR_COMPILER ** compiler)217 YR_API int yr_compiler_create(YR_COMPILER** compiler)
218 {
219   int result;
220   YR_COMPILER* new_compiler;
221 
222   new_compiler = (YR_COMPILER*) yr_calloc(1, sizeof(YR_COMPILER));
223 
224   if (new_compiler == NULL)
225     return ERROR_INSUFFICIENT_MEMORY;
226 
227   new_compiler->current_rule_idx = UINT32_MAX;
228   new_compiler->next_rule_idx = 0;
229   new_compiler->current_string_idx = 0;
230   new_compiler->current_namespace_idx = 0;
231   new_compiler->current_meta_idx = 0;
232   new_compiler->num_namespaces = 0;
233   new_compiler->errors = 0;
234   new_compiler->callback = NULL;
235   new_compiler->rules = NULL;
236   new_compiler->include_callback = _yr_compiler_default_include_callback;
237   new_compiler->incl_clbk_user_data = NULL;
238   new_compiler->include_free = _yr_compiler_default_include_free;
239   new_compiler->re_ast_callback = NULL;
240   new_compiler->re_ast_clbk_user_data = NULL;
241   new_compiler->last_error = ERROR_SUCCESS;
242   new_compiler->last_error_line = 0;
243   new_compiler->current_line = 0;
244   new_compiler->file_name_stack_ptr = 0;
245   new_compiler->fixup_stack_head = NULL;
246   new_compiler->loop_index = -1;
247   new_compiler->loop_for_of_var_index = -1;
248 
249   new_compiler->atoms_config.get_atom_quality = yr_atoms_heuristic_quality;
250   new_compiler->atoms_config.quality_warning_threshold =
251       YR_ATOM_QUALITY_WARNING_THRESHOLD;
252 
253   result = yr_hash_table_create(5000, &new_compiler->rules_table);
254 
255   if (result == ERROR_SUCCESS)
256     result = yr_hash_table_create(1000, &new_compiler->objects_table);
257 
258   if (result == ERROR_SUCCESS)
259     result = yr_hash_table_create(10000, &new_compiler->strings_table);
260 
261   if (result == ERROR_SUCCESS)
262     result = yr_hash_table_create(10000, &new_compiler->sz_table);
263 
264   if (result == ERROR_SUCCESS)
265     result = yr_arena_create(YR_NUM_SECTIONS, 1048576, &new_compiler->arena);
266 
267   if (result == ERROR_SUCCESS)
268     result = yr_ac_automaton_create(
269         new_compiler->arena, &new_compiler->automaton);
270 
271   if (result == ERROR_SUCCESS)
272   {
273     *compiler = new_compiler;
274   }
275   else  // if error, do cleanup
276   {
277     yr_compiler_destroy(new_compiler);
278   }
279 
280   return result;
281 }
282 
yr_compiler_destroy(YR_COMPILER * compiler)283 YR_API void yr_compiler_destroy(YR_COMPILER* compiler)
284 {
285   yr_arena_release(compiler->arena);
286 
287   if (compiler->automaton != NULL)
288     yr_ac_automaton_destroy(compiler->automaton);
289 
290   yr_hash_table_destroy(compiler->rules_table, NULL);
291 
292   yr_hash_table_destroy(compiler->strings_table, NULL);
293 
294   yr_hash_table_destroy(compiler->sz_table, NULL);
295 
296   yr_hash_table_destroy(
297       compiler->objects_table,
298       (YR_HASH_TABLE_FREE_VALUE_FUNC) yr_object_destroy);
299 
300   if (compiler->atoms_config.free_quality_table)
301     yr_free(compiler->atoms_config.quality_table);
302 
303   for (int i = 0; i < compiler->file_name_stack_ptr; i++)
304     yr_free(compiler->file_name_stack[i]);
305 
306   YR_FIXUP* fixup = compiler->fixup_stack_head;
307 
308   while (fixup != NULL)
309   {
310     YR_FIXUP* next_fixup = fixup->next;
311     yr_free(fixup);
312     fixup = next_fixup;
313   }
314 
315   yr_free(compiler);
316 }
317 
yr_compiler_set_callback(YR_COMPILER * compiler,YR_COMPILER_CALLBACK_FUNC callback,void * user_data)318 YR_API void yr_compiler_set_callback(
319     YR_COMPILER* compiler,
320     YR_COMPILER_CALLBACK_FUNC callback,
321     void* user_data)
322 {
323   compiler->callback = callback;
324   compiler->user_data = user_data;
325 }
326 
yr_compiler_set_include_callback(YR_COMPILER * compiler,YR_COMPILER_INCLUDE_CALLBACK_FUNC include_callback,YR_COMPILER_INCLUDE_FREE_FUNC include_free,void * user_data)327 YR_API void yr_compiler_set_include_callback(
328     YR_COMPILER* compiler,
329     YR_COMPILER_INCLUDE_CALLBACK_FUNC include_callback,
330     YR_COMPILER_INCLUDE_FREE_FUNC include_free,
331     void* user_data)
332 {
333   compiler->include_callback = include_callback;
334   compiler->include_free = include_free;
335   compiler->incl_clbk_user_data = user_data;
336 }
337 
yr_compiler_set_re_ast_callback(YR_COMPILER * compiler,YR_COMPILER_RE_AST_CALLBACK_FUNC re_ast_callback,void * user_data)338 YR_API void yr_compiler_set_re_ast_callback(
339     YR_COMPILER* compiler,
340     YR_COMPILER_RE_AST_CALLBACK_FUNC re_ast_callback,
341     void* user_data)
342 {
343   compiler->re_ast_callback = re_ast_callback;
344   compiler->re_ast_clbk_user_data = user_data;
345 }
346 
347 ////////////////////////////////////////////////////////////////////////////////
348 // This function allows to specify an atom quality table to be used by the
349 // compiler for choosing the best atoms from regular expressions and strings.
350 // When a quality table is set, the compiler uses yr_atoms_table_quality
351 // instead of yr_atoms_heuristic_quality for computing atom quality. The table
352 // has an arbitrary number of entries, each composed of YR_MAX_ATOM_LENGTH + 1
353 // bytes. The first YR_MAX_ATOM_LENGTH bytes from each entry are the atom's
354 // ones, and the remaining byte is a value in the range 0-255 determining the
355 // atom's quality. Entries must be lexicographically sorted by atom in ascending
356 // order.
357 //
358 //  [ atom (YR_MAX_ATOM_LENGTH bytes) ] [ quality (1 byte) ]
359 //
360 //  [ 00 00 .. 00 00 ] [ 00 ]
361 //  [ 00 00 .. 00 01 ] [ 45 ]
362 //  [ 00 00 .. 00 02 ] [ 13 ]
363 //  ...
364 //  [ FF FF .. FF FF ] [ 03 ]
365 //
366 // The "table" argument must point to a buffer containing the quality in
367 // the format explained above, and "entries" must contain the number of entries
368 // in the table. The table can not be freed while the compiler is in use, the
369 // caller is responsible for freeing the table.
370 //
371 // The "warning_threshold" argument must be a number between 0 and 255, if some
372 // atom chosen for a string have a quality below the specified threshold a
373 // warning like "<string> is slowing down scanning" is shown.
374 //
yr_compiler_set_atom_quality_table(YR_COMPILER * compiler,const void * table,int entries,unsigned char warning_threshold)375 YR_API void yr_compiler_set_atom_quality_table(
376     YR_COMPILER* compiler,
377     const void* table,
378     int entries,
379     unsigned char warning_threshold)
380 {
381   compiler->atoms_config.free_quality_table = false;
382   compiler->atoms_config.quality_warning_threshold = warning_threshold;
383   compiler->atoms_config.get_atom_quality = yr_atoms_table_quality;
384   compiler->atoms_config.quality_table_entries = entries;
385   compiler->atoms_config.quality_table = (YR_ATOM_QUALITY_TABLE_ENTRY*) table;
386 }
387 
388 ////////////////////////////////////////////////////////////////////////////////
389 // Load an atom quality table from a file. The file's content must have the
390 // format explained in the description for yr_compiler_set_atom_quality_table.
391 //
yr_compiler_load_atom_quality_table(YR_COMPILER * compiler,const char * filename,unsigned char warning_threshold)392 YR_API int yr_compiler_load_atom_quality_table(
393     YR_COMPILER* compiler,
394     const char* filename,
395     unsigned char warning_threshold)
396 {
397   long file_size;
398   int entries;
399   void* table;
400 
401   FILE* fh = fopen(filename, "rb");
402 
403   if (fh == NULL)
404     return ERROR_COULD_NOT_OPEN_FILE;
405 
406   fseek(fh, 0L, SEEK_END);
407   file_size = ftell(fh);
408   fseek(fh, 0L, SEEK_SET);
409 
410   if (file_size == -1L)
411   {
412     fclose(fh);
413     return ERROR_COULD_NOT_READ_FILE;
414   }
415 
416   table = yr_malloc(file_size);
417 
418   if (table == NULL)
419   {
420     fclose(fh);
421     return ERROR_INSUFFICIENT_MEMORY;
422   }
423 
424   entries = (int) file_size / sizeof(YR_ATOM_QUALITY_TABLE_ENTRY);
425 
426   if (fread(table, sizeof(YR_ATOM_QUALITY_TABLE_ENTRY), entries, fh) != entries)
427   {
428     fclose(fh);
429     yr_free(table);
430     return ERROR_COULD_NOT_READ_FILE;
431   }
432 
433   fclose(fh);
434 
435   yr_compiler_set_atom_quality_table(
436       compiler, table, entries, warning_threshold);
437 
438   compiler->atoms_config.free_quality_table = true;
439 
440   return ERROR_SUCCESS;
441 }
442 
_yr_compiler_push_file_name(YR_COMPILER * compiler,const char * file_name)443 int _yr_compiler_push_file_name(YR_COMPILER* compiler, const char* file_name)
444 {
445   char* str;
446   int i;
447 
448   for (i = 0; i < compiler->file_name_stack_ptr; i++)
449   {
450     if (strcmp(file_name, compiler->file_name_stack[i]) == 0)
451       return ERROR_INCLUDES_CIRCULAR_REFERENCE;
452   }
453 
454   if (compiler->file_name_stack_ptr == YR_MAX_INCLUDE_DEPTH)
455     return ERROR_INCLUDE_DEPTH_EXCEEDED;
456 
457   str = yr_strdup(file_name);
458 
459   if (str == NULL)
460     return ERROR_INSUFFICIENT_MEMORY;
461 
462   compiler->file_name_stack[compiler->file_name_stack_ptr] = str;
463   compiler->file_name_stack_ptr++;
464 
465   return ERROR_SUCCESS;
466 }
467 
_yr_compiler_pop_file_name(YR_COMPILER * compiler)468 void _yr_compiler_pop_file_name(YR_COMPILER* compiler)
469 {
470   if (compiler->file_name_stack_ptr > 0)
471   {
472     compiler->file_name_stack_ptr--;
473     yr_free(compiler->file_name_stack[compiler->file_name_stack_ptr]);
474     compiler->file_name_stack[compiler->file_name_stack_ptr] = NULL;
475   }
476 }
477 
_yr_compiler_get_var_frame(YR_COMPILER * compiler)478 int _yr_compiler_get_var_frame(YR_COMPILER* compiler)
479 {
480   int i, result = 0;
481 
482   for (i = 0; i < compiler->loop_index; i++)
483   {
484     result += compiler->loop[i].vars_count +
485               compiler->loop[i].vars_internal_count;
486   }
487 
488   return result;
489 }
490 
yr_compiler_get_current_file_name(YR_COMPILER * compiler)491 YR_API char* yr_compiler_get_current_file_name(YR_COMPILER* compiler)
492 {
493   if (compiler->file_name_stack_ptr > 0)
494   {
495     return compiler->file_name_stack[compiler->file_name_stack_ptr - 1];
496   }
497   else
498   {
499     return NULL;
500   }
501 }
502 
_yr_compiler_set_namespace(YR_COMPILER * compiler,const char * namespace_)503 static int _yr_compiler_set_namespace(
504     YR_COMPILER* compiler,
505     const char* namespace_)
506 {
507   YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr(
508       compiler->arena, YR_NAMESPACES_TABLE, 0);
509 
510   bool found = false;
511 
512   for (int i = 0; i < compiler->num_namespaces; i++, ns++)
513   {
514     if (strcmp(ns->name, namespace_) == 0)
515     {
516       found = true;
517       compiler->current_namespace_idx = i;
518       break;
519     }
520   }
521 
522   if (!found)
523   {
524     YR_ARENA_REF ref;
525 
526     FAIL_ON_ERROR(yr_arena_allocate_struct(
527         compiler->arena,
528         YR_NAMESPACES_TABLE,
529         sizeof(YR_NAMESPACE),
530         &ref,
531         offsetof(YR_NAMESPACE, name),
532         EOL));
533 
534     ns = (YR_NAMESPACE*) yr_arena_ref_to_ptr(compiler->arena, &ref);
535 
536     FAIL_ON_ERROR(_yr_compiler_store_string(compiler, namespace_, &ref));
537 
538     ns->name = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
539     ns->idx = compiler->num_namespaces;
540 
541     compiler->current_namespace_idx = compiler->num_namespaces;
542     compiler->num_namespaces++;
543   }
544 
545   return ERROR_SUCCESS;
546 }
547 
yr_compiler_add_file(YR_COMPILER * compiler,FILE * rules_file,const char * namespace_,const char * file_name)548 YR_API int yr_compiler_add_file(
549     YR_COMPILER* compiler,
550     FILE* rules_file,
551     const char* namespace_,
552     const char* file_name)
553 {
554   int result;
555 
556   // Don't allow yr_compiler_add_file() after
557   // yr_compiler_get_rules() has been called.
558 
559   assert(compiler->rules == NULL);
560 
561   // Don't allow calls to yr_compiler_add_file() if a previous call to
562   // yr_compiler_add_XXXX failed.
563 
564   assert(compiler->errors == 0);
565 
566   if (namespace_ != NULL)
567     compiler->last_error = _yr_compiler_set_namespace(compiler, namespace_);
568   else
569     compiler->last_error = _yr_compiler_set_namespace(compiler, "default");
570 
571   if (compiler->last_error == ERROR_SUCCESS && file_name != NULL)
572     compiler->last_error = _yr_compiler_push_file_name(compiler, file_name);
573 
574   if (compiler->last_error != ERROR_SUCCESS)
575     return ++compiler->errors;
576 
577   result = yr_lex_parse_rules_file(rules_file, compiler);
578 
579   if (file_name != NULL)
580     _yr_compiler_pop_file_name(compiler);
581 
582   return result;
583 }
584 
yr_compiler_add_fd(YR_COMPILER * compiler,YR_FILE_DESCRIPTOR rules_fd,const char * namespace_,const char * file_name)585 YR_API int yr_compiler_add_fd(
586     YR_COMPILER* compiler,
587     YR_FILE_DESCRIPTOR rules_fd,
588     const char* namespace_,
589     const char* file_name)
590 {
591   int result;
592 
593   // Don't allow yr_compiler_add_fd() after
594   // yr_compiler_get_rules() has been called.
595   assert(compiler->rules == NULL);
596 
597   // Don't allow calls to yr_compiler_add_fd() if a previous call to
598   // yr_compiler_add_XXXX failed.
599   assert(compiler->errors == 0);
600 
601   if (namespace_ != NULL)
602     compiler->last_error = _yr_compiler_set_namespace(compiler, namespace_);
603   else
604     compiler->last_error = _yr_compiler_set_namespace(compiler, "default");
605 
606   if (compiler->last_error == ERROR_SUCCESS && file_name != NULL)
607     compiler->last_error = _yr_compiler_push_file_name(compiler, file_name);
608 
609   if (compiler->last_error != ERROR_SUCCESS)
610     return ++compiler->errors;
611 
612   result = yr_lex_parse_rules_fd(rules_fd, compiler);
613 
614   if (file_name != NULL)
615     _yr_compiler_pop_file_name(compiler);
616 
617   return result;
618 }
619 
yr_compiler_add_string(YR_COMPILER * compiler,const char * rules_string,const char * namespace_)620 YR_API int yr_compiler_add_string(
621     YR_COMPILER* compiler,
622     const char* rules_string,
623     const char* namespace_)
624 {
625   // Don't allow calls to yr_compiler_add_string() after
626   // yr_compiler_get_rules() has been called.
627   assert(compiler->rules == NULL);
628 
629   // Don't allow calls to yr_compiler_add_string() if a previous call to
630   // yr_compiler_add_XXXX failed.
631   assert(compiler->errors == 0);
632 
633   if (namespace_ != NULL)
634     compiler->last_error = _yr_compiler_set_namespace(compiler, namespace_);
635   else
636     compiler->last_error = _yr_compiler_set_namespace(compiler, "default");
637 
638   if (compiler->last_error != ERROR_SUCCESS)
639     return ++compiler->errors;
640 
641   return yr_lex_parse_rules_string(rules_string, compiler);
642 }
643 
_yr_compiler_compile_rules(YR_COMPILER * compiler)644 static int _yr_compiler_compile_rules(YR_COMPILER* compiler)
645 {
646   YR_RULE null_rule;
647   YR_EXTERNAL_VARIABLE null_external;
648 
649   uint8_t halt = OP_HALT;
650 
651   // Write halt instruction at the end of code.
652   FAIL_ON_ERROR(yr_arena_write_data(
653       compiler->arena, YR_CODE_SECTION, &halt, sizeof(uint8_t), NULL));
654 
655   // Write a null rule indicating the end.
656   memset(&null_rule, 0xFA, sizeof(YR_RULE));
657   null_rule.flags = RULE_FLAGS_NULL;
658 
659   FAIL_ON_ERROR(yr_arena_write_data(
660       compiler->arena, YR_RULES_TABLE, &null_rule, sizeof(YR_RULE), NULL));
661 
662   // Write a null external indicating the end.
663   memset(&null_external, 0xFA, sizeof(YR_EXTERNAL_VARIABLE));
664   null_external.type = EXTERNAL_VARIABLE_TYPE_NULL;
665 
666   FAIL_ON_ERROR(yr_arena_write_data(
667       compiler->arena,
668       YR_EXTERNAL_VARIABLES_TABLE,
669       &null_external,
670       sizeof(YR_EXTERNAL_VARIABLE),
671       NULL));
672 
673   // Write Aho-Corasick automaton to arena.
674   FAIL_ON_ERROR(yr_ac_compile(compiler->automaton, compiler->arena));
675 
676   YR_ARENA_REF ref;
677 
678   FAIL_ON_ERROR(yr_arena_allocate_struct(
679       compiler->arena, YR_SUMMARY_SECTION, sizeof(YR_SUMMARY), &ref, EOL));
680 
681   YR_SUMMARY* summary = (YR_SUMMARY*) yr_arena_ref_to_ptr(
682       compiler->arena, &ref);
683 
684   summary->num_namespaces = compiler->num_namespaces;
685   summary->num_rules = compiler->next_rule_idx;
686   summary->num_strings = compiler->current_string_idx;
687 
688   return yr_rules_from_arena(compiler->arena, &compiler->rules);
689 }
690 
yr_compiler_get_rules(YR_COMPILER * compiler,YR_RULES ** rules)691 YR_API int yr_compiler_get_rules(YR_COMPILER* compiler, YR_RULES** rules)
692 {
693   // Don't allow calls to yr_compiler_get_rules() if a previous call to
694   // yr_compiler_add_XXXX failed.
695   assert(compiler->errors == 0);
696 
697   *rules = NULL;
698 
699   if (compiler->rules == NULL)
700     FAIL_ON_ERROR(_yr_compiler_compile_rules(compiler));
701 
702   *rules = compiler->rules;
703 
704   return ERROR_SUCCESS;
705 }
706 
_yr_compiler_define_variable(YR_COMPILER * compiler,YR_EXTERNAL_VARIABLE * external)707 static int _yr_compiler_define_variable(
708     YR_COMPILER* compiler,
709     YR_EXTERNAL_VARIABLE* external)
710 {
711   YR_EXTERNAL_VARIABLE* ext;
712   YR_OBJECT* object;
713 
714   if (external->identifier == NULL)
715     return ERROR_INVALID_ARGUMENT;
716 
717   object = (YR_OBJECT*) yr_hash_table_lookup(
718       compiler->objects_table, external->identifier, NULL);
719 
720   if (object != NULL)
721     return ERROR_DUPLICATED_EXTERNAL_VARIABLE;
722 
723   YR_ARENA_REF ext_ref;
724   YR_ARENA_REF ref;
725 
726   FAIL_ON_ERROR(yr_arena_allocate_struct(
727       compiler->arena,
728       YR_EXTERNAL_VARIABLES_TABLE,
729       sizeof(YR_EXTERNAL_VARIABLE),
730       &ext_ref,
731       offsetof(YR_EXTERNAL_VARIABLE, identifier),
732       EOL));
733 
734   ext = (YR_EXTERNAL_VARIABLE*) yr_arena_ref_to_ptr(compiler->arena, &ext_ref);
735 
736   FAIL_ON_ERROR(
737       _yr_compiler_store_string(compiler, external->identifier, &ref));
738 
739   ext->identifier = (const char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
740 
741   ext->type = external->type;
742   ext->value = external->value;
743 
744   if (external->type == EXTERNAL_VARIABLE_TYPE_STRING)
745   {
746     if (external->value.s == NULL)
747       return ERROR_INVALID_ARGUMENT;
748 
749     FAIL_ON_ERROR(_yr_compiler_store_string(compiler, external->value.s, &ref));
750 
751     FAIL_ON_ERROR(yr_arena_make_ptr_relocatable(
752         compiler->arena,
753         YR_EXTERNAL_VARIABLES_TABLE,
754         ext_ref.offset + offsetof(YR_EXTERNAL_VARIABLE, value.s),
755         EOL));
756 
757     ext->value.s = (char*) yr_arena_ref_to_ptr(compiler->arena, &ref);
758   }
759 
760   FAIL_ON_ERROR(yr_object_from_external_variable(external, &object));
761 
762   FAIL_ON_ERROR_WITH_CLEANUP(
763       yr_hash_table_add(
764           compiler->objects_table, external->identifier, NULL, (void*) object),
765       yr_object_destroy(object));
766 
767   return ERROR_SUCCESS;
768 }
769 
yr_compiler_define_integer_variable(YR_COMPILER * compiler,const char * identifier,int64_t value)770 YR_API int yr_compiler_define_integer_variable(
771     YR_COMPILER* compiler,
772     const char* identifier,
773     int64_t value)
774 {
775   YR_EXTERNAL_VARIABLE external;
776 
777   external.type = EXTERNAL_VARIABLE_TYPE_INTEGER;
778   external.identifier = identifier;
779   external.value.i = value;
780 
781   FAIL_ON_ERROR(_yr_compiler_define_variable(compiler, &external));
782 
783   return ERROR_SUCCESS;
784 }
785 
yr_compiler_define_boolean_variable(YR_COMPILER * compiler,const char * identifier,int value)786 YR_API int yr_compiler_define_boolean_variable(
787     YR_COMPILER* compiler,
788     const char* identifier,
789     int value)
790 {
791   YR_EXTERNAL_VARIABLE external;
792 
793   external.type = EXTERNAL_VARIABLE_TYPE_BOOLEAN;
794   external.identifier = identifier;
795   external.value.i = value;
796 
797   FAIL_ON_ERROR(_yr_compiler_define_variable(compiler, &external));
798 
799   return ERROR_SUCCESS;
800 }
801 
yr_compiler_define_float_variable(YR_COMPILER * compiler,const char * identifier,double value)802 YR_API int yr_compiler_define_float_variable(
803     YR_COMPILER* compiler,
804     const char* identifier,
805     double value)
806 {
807   YR_EXTERNAL_VARIABLE external;
808 
809   external.type = EXTERNAL_VARIABLE_TYPE_FLOAT;
810   external.identifier = identifier;
811   external.value.f = value;
812 
813   FAIL_ON_ERROR(_yr_compiler_define_variable(compiler, &external));
814 
815   return ERROR_SUCCESS;
816 }
817 
yr_compiler_define_string_variable(YR_COMPILER * compiler,const char * identifier,const char * value)818 YR_API int yr_compiler_define_string_variable(
819     YR_COMPILER* compiler,
820     const char* identifier,
821     const char* value)
822 {
823   YR_EXTERNAL_VARIABLE external;
824 
825   external.type = EXTERNAL_VARIABLE_TYPE_STRING;
826   external.identifier = identifier;
827   external.value.s = (char*) value;
828 
829   FAIL_ON_ERROR(_yr_compiler_define_variable(compiler, &external));
830 
831   return ERROR_SUCCESS;
832 }
833 
yr_compiler_get_error_message(YR_COMPILER * compiler,char * buffer,int buffer_size)834 YR_API char* yr_compiler_get_error_message(
835     YR_COMPILER* compiler,
836     char* buffer,
837     int buffer_size)
838 {
839   uint32_t max_strings_per_rule;
840 
841   switch (compiler->last_error)
842   {
843   case ERROR_INSUFFICIENT_MEMORY:
844     snprintf(buffer, buffer_size, "not enough memory");
845     break;
846   case ERROR_DUPLICATED_IDENTIFIER:
847     snprintf(
848         buffer,
849         buffer_size,
850         "duplicated identifier \"%s\"",
851         compiler->last_error_extra_info);
852     break;
853   case ERROR_DUPLICATED_STRING_IDENTIFIER:
854     snprintf(
855         buffer,
856         buffer_size,
857         "duplicated string identifier \"%s\"",
858         compiler->last_error_extra_info);
859     break;
860   case ERROR_DUPLICATED_TAG_IDENTIFIER:
861     snprintf(
862         buffer,
863         buffer_size,
864         "duplicated tag identifier \"%s\"",
865         compiler->last_error_extra_info);
866     break;
867   case ERROR_DUPLICATED_META_IDENTIFIER:
868     snprintf(
869         buffer,
870         buffer_size,
871         "duplicated metadata identifier \"%s\"",
872         compiler->last_error_extra_info);
873     break;
874   case ERROR_DUPLICATED_LOOP_IDENTIFIER:
875     snprintf(
876         buffer,
877         buffer_size,
878         "duplicated loop identifier \"%s\"",
879         compiler->last_error_extra_info);
880     break;
881   case ERROR_UNDEFINED_STRING:
882     snprintf(
883         buffer,
884         buffer_size,
885         "undefined string \"%s\"",
886         compiler->last_error_extra_info);
887     break;
888   case ERROR_UNDEFINED_IDENTIFIER:
889     snprintf(
890         buffer,
891         buffer_size,
892         "undefined identifier \"%s\"",
893         compiler->last_error_extra_info);
894     break;
895   case ERROR_UNREFERENCED_STRING:
896     snprintf(
897         buffer,
898         buffer_size,
899         "unreferenced string \"%s\"",
900         compiler->last_error_extra_info);
901     break;
902   case ERROR_EMPTY_STRING:
903     snprintf(
904         buffer,
905         buffer_size,
906         "empty string \"%s\"",
907         compiler->last_error_extra_info);
908     break;
909   case ERROR_NOT_A_STRUCTURE:
910     snprintf(
911         buffer,
912         buffer_size,
913         "\"%s\" is not a structure",
914         compiler->last_error_extra_info);
915     break;
916   case ERROR_NOT_INDEXABLE:
917     snprintf(
918         buffer,
919         buffer_size,
920         "\"%s\" is not an array or dictionary",
921         compiler->last_error_extra_info);
922     break;
923   case ERROR_NOT_A_FUNCTION:
924     snprintf(
925         buffer,
926         buffer_size,
927         "\"%s\" is not a function",
928         compiler->last_error_extra_info);
929     break;
930   case ERROR_INVALID_FIELD_NAME:
931     snprintf(
932         buffer,
933         buffer_size,
934         "invalid field name \"%s\"",
935         compiler->last_error_extra_info);
936     break;
937   case ERROR_MISPLACED_ANONYMOUS_STRING:
938     snprintf(buffer, buffer_size, "wrong use of anonymous string");
939     break;
940   case ERROR_INCLUDES_CIRCULAR_REFERENCE:
941     snprintf(buffer, buffer_size, "include circular reference");
942     break;
943   case ERROR_INCLUDE_DEPTH_EXCEEDED:
944     snprintf(buffer, buffer_size, "too many levels of included rules");
945     break;
946   case ERROR_LOOP_NESTING_LIMIT_EXCEEDED:
947     snprintf(buffer, buffer_size, "loop nesting limit exceeded");
948     break;
949   case ERROR_NESTED_FOR_OF_LOOP:
950     snprintf(
951         buffer,
952         buffer_size,
953         "'for <quantifier> of <string set>' loops can't be nested");
954     break;
955   case ERROR_UNKNOWN_MODULE:
956     snprintf(
957         buffer,
958         buffer_size,
959         "unknown module \"%s\"",
960         compiler->last_error_extra_info);
961     break;
962   case ERROR_INVALID_MODULE_NAME:
963     snprintf(
964         buffer,
965         buffer_size,
966         "invalid module name \"%s\"",
967         compiler->last_error_extra_info);
968     break;
969   case ERROR_DUPLICATED_STRUCTURE_MEMBER:
970     snprintf(buffer, buffer_size, "duplicated structure member");
971     break;
972   case ERROR_WRONG_ARGUMENTS:
973     snprintf(
974         buffer,
975         buffer_size,
976         "wrong arguments for function \"%s\"",
977         compiler->last_error_extra_info);
978     break;
979   case ERROR_WRONG_RETURN_TYPE:
980     snprintf(buffer, buffer_size, "wrong return type for overloaded function");
981     break;
982   case ERROR_INVALID_HEX_STRING:
983   case ERROR_INVALID_REGULAR_EXPRESSION:
984   case ERROR_SYNTAX_ERROR:
985   case ERROR_WRONG_TYPE:
986     snprintf(buffer, buffer_size, "%s", compiler->last_error_extra_info);
987     break;
988   case ERROR_INTERNAL_FATAL_ERROR:
989     snprintf(buffer, buffer_size, "internal fatal error");
990     break;
991   case ERROR_DIVISION_BY_ZERO:
992     snprintf(buffer, buffer_size, "division by zero");
993     break;
994   case ERROR_REGULAR_EXPRESSION_TOO_LARGE:
995     snprintf(buffer, buffer_size, "regular expression is too large");
996     break;
997   case ERROR_REGULAR_EXPRESSION_TOO_COMPLEX:
998     snprintf(buffer, buffer_size, "regular expression is too complex");
999     break;
1000   case ERROR_TOO_MANY_STRINGS:
1001     yr_get_configuration(YR_CONFIG_MAX_STRINGS_PER_RULE, &max_strings_per_rule);
1002     snprintf(
1003         buffer,
1004         buffer_size,
1005         "too many strings in rule \"%s\" (limit: %d)",
1006         compiler->last_error_extra_info,
1007         max_strings_per_rule);
1008     break;
1009   case ERROR_INTEGER_OVERFLOW:
1010     snprintf(
1011         buffer,
1012         buffer_size,
1013         "integer overflow in \"%s\"",
1014         compiler->last_error_extra_info);
1015     break;
1016   case ERROR_COULD_NOT_READ_FILE:
1017     snprintf(buffer, buffer_size, "could not read file");
1018     break;
1019   case ERROR_INVALID_MODIFIER:
1020     snprintf(buffer, buffer_size, "%s", compiler->last_error_extra_info);
1021     break;
1022   case ERROR_DUPLICATED_MODIFIER:
1023     snprintf(buffer, buffer_size, "duplicated modifier");
1024     break;
1025   }
1026 
1027   return buffer;
1028 }
1029