1 /* Jitter: VM-independent mutable routine data structures: header.
2 
3    Copyright (C) 2016, 2017, 2018, 2019 Luca Saiu
4    Updated in 2020 by Luca Saiu
5    Written by Luca Saiu
6 
7    This file is part of Jitter.
8 
9    Jitter is free software: you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation, either version 3 of the License, or
12    (at your option) any later version.
13 
14    Jitter is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with Jitter.  If not, see <http://www.gnu.org/licenses/>. */
21 
22 
23 #ifndef JITTER_MUTABLE_ROUTINE_H_
24 #define JITTER_MUTABLE_ROUTINE_H_
25 
26 #include <jitter/jitter.h>
27 #include <jitter/jitter-dynamic-buffer.h>
28 #include <jitter/jitter-hash.h>
29 #include <jitter/jitter-instruction.h>
30 #include <jitter/jitter-print.h>
31 
32 
33 /* Routine data structures.
34  * ************************************************************************** */
35 
36 // FIXME: comment.
37 enum jitter_mutable_routine_stage
38   {
39     jitter_routine_stage_unspecialized,
40     jitter_routine_stage_specialized,
41     jitter_routine_stage_replicated
42   };
43 
44 /* A descriptor for a replicated block of code. */
45 struct jitter_replicated_block
46 {
47   /* The opcode of the specialized instruction the block is a translation of.
48      This is actually an enum vmprefix_specialized_instruction_opcode , but
49      since the fixnum type is wide enough to represent it on any VM we can
50      define this once and for all. */
51   //enum vmprefix_specialized_instruction_opcode specialized_opcode;
52   jitter_uint specialized_opcode;
53 
54   /* A pointer to the beginning ot the native code which is the translation of
55      one specific instance of a specialized instruction; the native code may
56      also include (at the beginning) some instructions to load residual
57      arguments.
58      The pointer is NULL as long as the code has not been replicated.
59      When non-NULL this field points into the same native_code block pointed to
60      by a program, so the referred memory must not be freed separately. */
61   char *native_code;
62 
63   /* The native code size, in bytes.  This is not necessarily the same as the
64      specialized_opcode-th element of vmprefix_thread_sizes, again because of added
65      code to load residuals.
66      The size is 0 as long as the block has not been replicated. */
67   size_t native_code_size;
68 };
69 
70 /* Options selectively applied to a Jittery mutable routine, to be selected by
71    the user.
72    This data structure, like a program data structure, should be considered an
73    opaque abstract type, and the user should only use the functions declared
74    below to set options. */
75 struct jitter_mutable_routine_options
76 {
77   /* Non-false if the other options can still be changed.  This is true at
78      initialization, and becomes false as soon as the user appends the first
79      label or instruction. */
80   bool can_change;
81 
82   /* False if specialization is allowed to generate specialized instructions
83      using fast registers, as would be normal in production runs.  If non-false,
84      whenever a register parameter is appended to an instruction, the actual
85      register index to be used is changed to be equal to the given index summed
86      to the number of fast registers in the class.
87      This option is designed for benchmarking, in order to compare with
88      alternative VMs which should behave like a Jittery VM with this option on,
89      up to slow register index shifting (never needed in a Jittery VM). */
90   bool slow_registers_only;
91 
92   /* False is specialization is allowed to generate specialized instructions
93      with non-residual immediate arguments, as would be normal in production
94      runs.  If non-false always residualize literal arguments (except,
95      currently, for fast labels).
96      This option is designed for benchmarking, in order to compare with
97      alternative VMs which should behave like a Jittery VM with this option
98      on, up to fast branches (only available, and faster, in a Jittery VM). */
99   bool slow_literals_only;
100 
101   /* If non-false then automatically add a final "exitvm" instruction at the end
102      of each VM routine; otherwise add a final "unreachable" instruction
103      instead, which saves memory and makes replicated code smaller, but assumes
104      that the unreachable instruction is actually unreachable.  This is true by
105      default. */
106   bool add_final_exitvm;
107 
108   /* If non-false then enable optimization rewriting.  The option is designed
109      for debugging of rewrite rules and for benchmarking, particularly when
110      comparing a Jittery VM with an alternative not supporting rewriting.
111      This is true by default.*/
112   bool optimization_rewriting;
113 };
114 
115 /* This is enough for declaring pointer to structs of this type here.  See
116    jitter-specialize.h for an actual definition, and a description of the
117    idea. */
118 struct jitter_executable_routine;
119 
120 /* The internal representation of a program being edited by the user.  This
121    should be considered an abstract type, as the internal structure is subject
122    to change. */
123 struct jitter_mutable_routine
124 {
125   /* The program stage at the present time. */
126   enum jitter_mutable_routine_stage stage;
127 
128   /* The options applying to this program.  This field is initialized to default
129      values at program initialization, and should only be updated indirectly
130      thru the functions declared in the "Program options" section below. */
131   struct jitter_mutable_routine_options options;
132 
133   /* A dynamic array containing struct jitter_instruction * elements.  This is
134      filled by parsing or by initialization with unspecialized instructions.
135      The buffer contains pointers rather than directly instructions to make
136      rewriting easier. */
137   struct jitter_dynamic_buffer instructions;
138 
139   /* The opaque label to be generated as fresh at the next request. */
140   jitter_label next_unused_opaque_label;
141 
142   /* A map associating symbolic label names to opaque labels.  There can be
143      opaque labels without any symbolic names, but every symbolic name in use
144      must always be associated to an opaque label.  The datum is handled as a
145      pointer by the hash API, but here it should just be cast to and from an
146      jitter_label .  Symbolic label names are copied into the map
147      whenever a binding is added, and do not share memory with user data
148      structures. */
149   struct jitter_hash_table label_name_to_opaque_label;
150 
151   /* A dynamic array containing jitter_int elements.  Each array index
152      represents an opaque label, and its associated array element the
153      unspecialized instruction index where that label leads, or -1 if the
154      label is unresolved.  It is not an error to have unresolved labels,
155      even late after label resolution, as long as such labels are never
156      used in instruction parameters.
157      Rewriting poses no particular problem with respect to this mapping:
158      once a label is assigned an instruction index, the *following*
159      instructions may be rewritten, but not the previous one, maintaining
160      the existing mapping valid. */
161   struct jitter_dynamic_buffer opaque_label_to_instruction_index;
162 
163   /* A pointer to the instruction currently being initialized within
164      instructions. */
165   struct jitter_instruction *current_instruction;
166 
167   /* The next uninitialized parameter of *current_instruction . */
168   struct jitter_parameter *next_uninitialized_parameter;
169 
170   /* The next expected parameter type, which *next_uninitialized_parameter will
171      need to match. */
172   const struct jitter_meta_instruction_parameter_type *
173   next_expected_parameter_type;
174 
175   /* How many parameters are we still expecting before completing the
176      instruction which is currently being appended.  If no instruction is
177      incomplete, including right after initialization, the field is zero. */
178   int expected_parameter_no;
179 
180   /* How many complete instructions at the end of the program are candidate for
181      rewriting.  This starts at zero, and gets reset every time a label is
182      added. */
183   size_t rewritable_instruction_no;
184 
185   /* A pointer to a malloced array of booleans, having the same size as the
186      number of instructions in the program.  Each element of the array is true
187      if and only if the corresponding program instruction is a jump target.
188      This is allocated at specialization time, and NULL before. */
189   bool *jump_targets;
190 
191   /* We need to map unspecialized instruction indices into specialized
192      instruction offsets within specialized_program, in chars from the
193      beginning.  This is needed because specialized instructions have variable
194      sizes, and label arguments need to be backpatched in to point to
195      instruction beginning addresses rather than indices.
196      Notice that this array is indexed by unspecialized instruction indices, and
197      that not every possible index is valid: if an unspecialized instruction
198      interval is specialized into one superinstruction then only its first
199      unspecialized instruction in the interval has a corresponding
200      superinstruction. */
201   jitter_int *instruction_index_to_specialized_instruction_offset;
202 
203   /* The sequence of replicated blocks in the specialized program, in order.
204      Each element is a struct jitter_replicated_block .  This is only used with
205      replication, and for disassembly. */
206   struct jitter_dynamic_buffer replicated_blocks;
207 
208   /* Generated native code and its size, only used with replication [FIXME:
209      generalize later, after I introduce alternatives even more sophisticated
210      than replication, which will probably still need this]. */
211   char *native_code;
212   size_t native_code_size;
213 
214   /* The specialized program.  Each element is a union jitter_specialized_word . */
215   struct jitter_dynamic_buffer specialized_program;
216 
217   /* The indices of label parameters within specialized_instructions as
218      jitter_int's, to be patched at the end of specialization.  While
219      specialized_instructions is being added to, label parameters are
220      instruction indices; after patching they become pointers to the beginning
221      of instructions within specialized_instructions. */
222   struct jitter_dynamic_buffer specialized_label_indices;
223 
224   /* The number of slow registers needed *per class* in order to run this
225      program; or, alternatively, the maximum number of slow registers needed to
226      run this program in any class.  Slow registers are always added in the same
227      number for all classes, even if each class may have a different number of
228      fast registers. */
229   jitter_int slow_register_per_class_no;
230 
231   /* The executable version of this routine, if one exists.  NULL before the
232      executable routine is made. */
233   struct jitter_executable_routine *executable_routine;
234 
235   /* A pointer to the VM-specific definitions of the VM for this program. */
236   const struct jitter_vm *vm;
237 };
238 
239 /* Return the number of unspecialized instruction in the pointed mutable routine
240    also counting the one which is currently incomplete, if any. */
241 size_t
242 jitter_mutable_routine_instruction_no (const struct jitter_mutable_routine *p)
243   __attribute__ ((pure));
244 
245 /* Return a freshly-allocated mutable routine, empty program with an empty vm
246    field. */
247 struct jitter_mutable_routine*
248 jitter_make_mutable_routine (const struct jitter_vm *vm)
249   __attribute__ ((nonnull (1), returns_nonnull));
250 
251 /* Destroy the pointed mutable routine, if any.  Do nothing if p is NULL. */
252 void
253 jitter_destroy_mutable_routine (struct jitter_mutable_routine *p);
254 
255 
256 
257 
258 /* Mutable routine options.
259  * ************************************************************************** */
260 
261 /* The functions in this section set some user options for an existing program.
262    Such settings are only possible on an "empty" mutable routine, before the
263    first instruction or label is appended to it. */
264 
265 /* Set the slow_registers_only option to the given value in the pointed mutable
266    routine.  Fail fatally if the option is no longer settable. */
267 void
268 jitter_set_mutable_routine_option_slow_registers_only
269    (struct jitter_mutable_routine *p, bool option)
270   __attribute__ ((nonnull (1)));
271 
272 /* Set the slow_registers_only option to the given value in the pointed mutable
273    routine.  Fail fatally if the option is no longer settable. */
274 void
275 jitter_set_mutable_routine_option_slow_literals_only
276    (struct jitter_mutable_routine *p, bool option)
277   __attribute__ ((nonnull (1)));
278 
279 /* A convenience function behaving in an equivalent way to a call to
280    jitter_set_mutable_routine_option_slow_registers_only followed by a call to
281    jitter_set_mutable_routine_option_slow_literals_only on the same mutable
282    routine with the same option value. */
283 void
284 jitter_set_mutable_routine_option_slow_literals_and_registers_only
285    (struct jitter_mutable_routine *p, bool option)
286   __attribute__ ((nonnull (1)));
287 
288 /* Set the add_final_exitvm option to the given value in the pointed mutable
289    routine.  Fail fatally if the option is no longer settable. */
290 void
291 jitter_set_mutable_routine_option_add_final_exitvm
292    (struct jitter_mutable_routine *p, bool option)
293   __attribute__ ((nonnull (1)));
294 
295 /* Set the optimization_rewriting option to the given value in the pointed
296    mutable routine.  Fail fatally if the option is no longer settable. */
297 void
298 jitter_set_mutable_routine_option_optimization_rewriting
299    (struct jitter_mutable_routine *p, bool option)
300   __attribute__ ((nonnull (1)));
301 
302 
303 
304 
305 /* Label handing.
306  * ************************************************************************** */
307 
308 /* Return a fresh label for the pointed mutable routine, without an associated
309    symbolic name.  The caller may later use the label in an instruction argument
310    or associate it to a program point. */
311 jitter_label
312 jitter_fresh_label (struct jitter_mutable_routine *p)
313   __attribute__ ((nonnull (1)));
314 
315 /* Return a label for the pointed mutable routine, associated to the given
316    symbolic name.  If the symbolic name is new for the program, associate the
317    label to an internally-allocated copy of it; if the symbolic name is already
318    known, return the label already associated to it. */
319 jitter_label
320 jitter_symbolic_label (struct jitter_mutable_routine *p,
321                        const char *symbolic_name)
322   __attribute__ ((nonnull (1)));
323 
324 
325 
326 
327 /* Mutable routine construction API.
328  * ************************************************************************** */
329 
330 /* Update the pointed mutable routine, adding the given label before the
331    instruction which is coming next.
332    When this function is called the previous instruction, if any, must have been
333    completed. */
334 void
335 jitter_mutable_routine_append_label (struct jitter_mutable_routine *p,
336                                      jitter_label label)
337   __attribute__ ((nonnull (1)));
338 
339 /* Update the pointed mutable routine, adding a label with the given name before
340    the instruction which is coming next, and return the label.
341    When this function is called the previous instruction, if any, must have been
342    completed. */
343 jitter_label
344 jitter_mutable_routine_append_symbolic_label (struct jitter_mutable_routine *p,
345                                               const char *label_name)
346   __attribute__ ((nonnull (1, 2)));
347 
348 /* Update the pointed mutable routine, beginning a new instruction with the
349    given name, to be looked up in the meta-instruction hash table; the
350    instruction parameters, if any, have to be supplied with calls to
351    vmprefix_mutable_routine_append_*_parameter .  When this function is called
352    the previous instruction, if any, must have been completed.
353 
354    This function is convenient but requires a hash lookup on the name.
355    jitter_mutable_routine_append_meta_instruction is faster; see its comment for
356    the recommended way to use it. */
357 void
358 jitter_mutable_routine_append_instruction_name (struct jitter_mutable_routine *p,
359                                                 const char *instruction_name);
360 
361 /* Update the pointed mutable routine, beginning a new instruction with the
362    given unspecialized opcode, from the given array of meta-instructions.  When
363    this function is called the previous instruction, if any, must have been
364    completed.  The unspecialized_opcode argument is meant to be one case of enum
365    vmprefix_meta_instruction_id ; the enum name depends on the VM, which is why
366    this VM-independent prototype uses a generic integer type.
367 
368    The recommended way of using this function is via the VM-specific macro
369    wrapper VMPREFIX_MUTABLE_ROUTINE_APPEND_INSTRUCTION_ID , which lets the user
370    ignore the meta-instruction array and the number of meta-instructions.
371 
372    This function is flexible enough to use from user code where the instruction
373    to be added is the result of some previous computation; the macro API
374    recommended in the comment before jitter_append_meta_instruction doesn't fit
375    the problem as the instruction name would need to be supplied as a literal,
376    and the string-based API above would require a useless run-time hash
377    lookup. */
378 void
379 jitter_mutable_routine_append_instruction_id
380    (struct jitter_mutable_routine *p,
381     const struct jitter_meta_instruction * const mis,
382     size_t meta_instruction_no,
383     unsigned unspecialized_opcode);
384 
385 /* Update the pointed mutable routine, beginning a new instruction which is an
386    instance of the pointed meta-instruction; the instruction parameters, if any,
387    have to be supplied with calls to vmprefix_append_*_parameter .  When this
388    function is called the previous instruction, if any, must have been
389    completed.
390 
391    Supplying a meta-instruction pointer from a user program is very
392    inconvenient.  The recommended way of using this function, which is more
393    efficient than jitter_append_instruction_name, is thru the machine-generated
394    macro [VMPREFIX]_APPEND_INSTRUCTION. */
395 void
396 jitter_mutable_routine_append_meta_instruction
397    (struct jitter_mutable_routine *p,
398     const struct jitter_meta_instruction * const mi);
399 
400 /* Update the given program, adding one more parameter (left-to-right) to the
401    unspecialized instruction currently being described.  Fail fatally if there
402    are no instructions yet, or the last added instruction is already
403    complete.  If appending a symbolic-label argument, return its opaque
404    label identifier.
405    Notice that the macro [VMPREFIX]_APPEND_REGISTER_PARAMETER provides a more
406    convenient way of adding a register parameter. */
407 void
408 jitter_mutable_routine_append_literal_parameter
409    (struct jitter_mutable_routine *p,
410     union jitter_word immediate)
411   __attribute__((nonnull (1)));
412 void
413 jitter_mutable_routine_append_signed_literal_parameter
414    (struct jitter_mutable_routine *p,
415     jitter_int immediate)
416   __attribute__((nonnull (1)));
417 void
418 jitter_mutable_routine_append_unsigned_literal_parameter
419    (struct jitter_mutable_routine *p,
420     jitter_uint immediate)
421   __attribute__((nonnull (1)));
422 void
423 jitter_mutable_routine_append_pointer_literal_parameter
424    (struct jitter_mutable_routine *p,
425     void *immediate)
426   __attribute__((nonnull (1)));
427 void
428 jitter_mutable_routine_append_register_parameter
429    (struct jitter_mutable_routine *p,
430     const struct jitter_register_class *c,
431     jitter_register_index register_index)
432   __attribute__((nonnull (1, 2)));
433 jitter_label
434 jitter_mutable_routine_append_symbolic_label_parameter
435    (struct jitter_mutable_routine *p,
436     const char *label_name)
437   __attribute__((nonnull (1, 2)));
438 void
439 jitter_mutable_routine_append_label_parameter
440    (struct jitter_mutable_routine *p,
441     jitter_label label)
442   __attribute__((nonnull (1)));
443 
444 
445 
446 
447 /* Lower-level program-construction API.
448  * ************************************************************************** */
449 
450 /* This is used internally for rewriting. */
451 
452 /* Append the pointed instruction (without cloning it) to the pointed mutable
453    routine.  When this function is called the previous instruction, if any, must
454    have been completed. */
455 void
456 jitter_mutable_routine_append_instruction (struct jitter_mutable_routine *p,
457                                            const struct jitter_instruction *ip)
458   __attribute__ ((nonnull (1, 2)));
459 
460 /* Append a copy of the given parameter, without destroying it, to the current
461    instruction of the pointed mutable_routine.
462 
463    Rationale: making a copy is more convenient in the current implementation
464    than replacing an existing data structure, as the structure is pre-allocated.
465    The fact that this function is called in machine-generated code makes
466    convenience only a secondary consideration.  Moreover rewrite rules may copy
467    the same parameter more than once from an instruction to be rewritten into a
468    template. */
469 void
470 jitter_mutable_routine_append_parameter_copy (struct jitter_mutable_routine *p,
471                                               const struct jitter_parameter *pp)
472   __attribute__ ((nonnull (1, 2)));
473 
474 
475 
476 
477 /* Unspecialized program printer.
478  * ************************************************************************** */
479 
480 /* Print a readable representation of the pointed mutable routine to the
481    given print context.
482 
483    The output uses the following class names (see jitter/jitter-print.h), with
484    "vmprefix" replaced by the lower-case name of the VM for the mutable routine:
485    - "vmprefix-punctuation";
486    - "vmprefix-label";
487    - "vmprefix-instruction";
488    - "vmprefix-register";
489    - "vmprefix-number";
490    - "vmprefix-invalid".
491    Some VMs will print instruction literals though custom printers, which may
492    use other classes. */
493 void
494 jitter_mutable_routine_print (jitter_print_context ctx,
495                               const struct jitter_mutable_routine *p);
496 
497 
498 
499 
500 /* Jump target computation on unspecialized routines.
501  * ************************************************************************** */
502 
503 /* Given a routine return a pointer to a new array of booleans, allocated with
504    malloc, having the same size as the number of instructions in the routine.
505    Each element of the array is true if and only if the corresponding routine
506    instruction is a jump target.
507 
508    This is used at specialization time to compute the jump_targets field of a
509    struct jitter_mutable_routine , but also elsewhere, for printing unspecialized routines
510    -- therefore it cannot be a static function.
511 
512    This function is used internally, and the user does not need to see it. */
513 bool*
514 jitter_mutable_routine_jump_targets (const struct jitter_mutable_routine *p)
515   __attribute__ ((returns_nonnull, nonnull (1)));
516 
517 
518 
519 
520 /* Label resolution in unspecialized routines.
521  * ************************************************************************** */
522 
523 /* Resolve label arguments in unspecialized instruction parameters, replacing
524    opaque labels with unspecialized instruction indices.  After this is done
525    instruction parameters refer labels as unspecialized instruction indices.
526    Fail fatally if any referred label is still undefined, or if the routine is
527    not unspecialized.
528 
529    This function is used internally, and the user does not need to see it. */
530 void
531 jitter_mutable_routine_resolve_labels (struct jitter_mutable_routine *p)
532   __attribute__ ((nonnull (1)));
533 
534 #endif // #ifndef JITTER_MUTABLE_ROUTINE_H_
535