1 /**************************************************************************/
2 /*                                                                        */
3 /*                                 OCaml                                  */
4 /*                                                                        */
5 /*            Mark Shinwell and Leo White, Jane Street Europe             */
6 /*                                                                        */
7 /*   Copyright 2013--2016, Jane Street Group, LLC                         */
8 /*                                                                        */
9 /*   All rights reserved.  This file is distributed under the terms of    */
10 /*   the GNU Lesser General Public License version 2.1, with the          */
11 /*   special exception on linking described in the file LICENSE.          */
12 /*                                                                        */
13 /**************************************************************************/
14 
15 #define CAML_INTERNALS
16 
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <limits.h>
21 #include <math.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <signal.h>
26 #include "caml/config.h"
27 #ifdef HAS_UNISTD
28 #include <unistd.h>
29 #endif
30 
31 #include "caml/alloc.h"
32 #include "caml/backtrace_prim.h"
33 #include "caml/fail.h"
34 #include "caml/gc.h"
35 #include "caml/intext.h"
36 #include "caml/major_gc.h"
37 #include "caml/memory.h"
38 #include "caml/minor_gc.h"
39 #include "caml/misc.h"
40 #include "caml/mlvalues.h"
41 #include "caml/osdeps.h"
42 #include "caml/roots.h"
43 #include "caml/signals.h"
44 #include "caml/stack.h"
45 #include "caml/sys.h"
46 #include "caml/spacetime.h"
47 
48 #ifdef WITH_SPACETIME
49 
50 /* We force "noinline" in certain places to be sure we know how many
51    frames there will be on the stack. */
52 #define NOINLINE __attribute__((noinline))
53 
54 #ifdef HAS_LIBUNWIND
55 #define UNW_LOCAL_ONLY
56 #include "libunwind.h"
57 #endif
58 
59 static int automatic_snapshots = 0;
60 static double snapshot_interval = 0.0;
61 static double next_snapshot_time = 0.0;
62 static struct channel *snapshot_channel;
63 static int pid_when_snapshot_channel_opened;
64 
65 extern value caml_spacetime_debug(value);
66 
67 static char* start_of_free_node_block;
68 static char* end_of_free_node_block;
69 
70 typedef struct per_thread {
71   value* trie_node_root;
72   value* finaliser_trie_node_root;
73   struct per_thread* next;
74 } per_thread;
75 
76 /* List of tries corresponding to threads that have been created. */
77 /* CR-soon mshinwell: just include the main trie in this list. */
78 static per_thread* per_threads = NULL;
79 static int num_per_threads = 0;
80 
81 /* [caml_spacetime_shapes] is defined in the startup file. */
82 extern uint64_t* caml_spacetime_shapes;
83 
84 uint64_t** caml_spacetime_static_shape_tables = NULL;
85 shape_table* caml_spacetime_dynamic_shape_tables = NULL;
86 
87 static uintnat caml_spacetime_profinfo = (uintnat) 0;
88 
89 value caml_spacetime_trie_root = Val_unit;
90 value* caml_spacetime_trie_node_ptr = &caml_spacetime_trie_root;
91 
92 static value caml_spacetime_finaliser_trie_root_main_thread = Val_unit;
93 value* caml_spacetime_finaliser_trie_root
94   = &caml_spacetime_finaliser_trie_root_main_thread;
95 
96 /* CR-someday mshinwell: think about thread safety of the manipulation of
97    this list for multicore */
98 allocation_point* caml_all_allocation_points = NULL;
99 
100 static const uintnat chunk_size = 1024 * 1024;
101 
reinitialise_free_node_block(void)102 static void reinitialise_free_node_block(void)
103 {
104   size_t index;
105 
106   start_of_free_node_block = (char*) malloc(chunk_size);
107   end_of_free_node_block = start_of_free_node_block + chunk_size;
108 
109   for (index = 0; index < chunk_size / sizeof(value); index++) {
110     ((value*) start_of_free_node_block)[index] = Val_unit;
111   }
112 }
113 
114 #ifndef O_BINARY
115 #define O_BINARY 0
116 #endif
117 
118 #if defined (_WIN32) || defined (_WIN64)
119 extern value val_process_id;
120 #endif
121 
122 static uint32_t version_number = 0;
123 static uint32_t magic_number_base = 0xace00ace;
124 
caml_spacetime_write_magic_number_internal(struct channel * chan)125 static void caml_spacetime_write_magic_number_internal(struct channel* chan)
126 {
127   value magic_number =
128     Val_long(((uint64_t) magic_number_base)
129              | (((uint64_t) version_number) << 32));
130 
131   Lock(chan);
132   caml_output_val(chan, magic_number, Val_long(0));
133   Unlock(chan);
134 }
135 
caml_spacetime_write_magic_number(value v_channel)136 CAMLprim value caml_spacetime_write_magic_number(value v_channel)
137 {
138   caml_spacetime_write_magic_number_internal(Channel(v_channel));
139   return Val_unit;
140 }
141 
142 static char* automatic_snapshot_dir;
143 
open_snapshot_channel(void)144 static void open_snapshot_channel(void)
145 {
146   int fd;
147   char filename[8192];
148   int pid;
149 #if defined (_WIN32) || defined (_WIN64)
150   pid = Int_val(val_process_id);
151 #else
152   pid = getpid();
153 #endif
154   snprintf(filename, 8192, "%s/spacetime-%d", automatic_snapshot_dir, pid);
155   filename[8191] = '\0';
156   fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
157   if (fd == -1) {
158     automatic_snapshots = 0;
159   }
160   else {
161     snapshot_channel = caml_open_descriptor_out(fd);
162     snapshot_channel->flags |= CHANNEL_FLAG_BLOCKING_WRITE;
163     pid_when_snapshot_channel_opened = pid;
164     caml_spacetime_write_magic_number_internal(snapshot_channel);
165   }
166 }
167 
maybe_reopen_snapshot_channel(void)168 static void maybe_reopen_snapshot_channel(void)
169 {
170   /* This function should be used before writing to the automatic snapshot
171      channel.  It detects whether we have forked since the channel was opened.
172      If so, we close the old channel (ignoring any errors just in case the
173      old fd has been closed, e.g. in a double-fork situation where the middle
174      process has a loop to manually close all fds and no Spacetime snapshot
175      was written during that time) and then open a new one. */
176 
177   int pid;
178 #if defined (_WIN32) || defined (_WIN64)
179   pid = Int_val(val_process_id);
180 #else
181   pid = getpid();
182 #endif
183 
184   if (pid != pid_when_snapshot_channel_opened) {
185     caml_close_channel(snapshot_channel);
186     open_snapshot_channel();
187   }
188 }
189 
190 extern void caml_spacetime_automatic_save(void);
191 
caml_spacetime_initialize(void)192 void caml_spacetime_initialize(void)
193 {
194   /* Note that this is called very early (even prior to GC initialisation). */
195 
196   char *ap_interval;
197 
198   reinitialise_free_node_block();
199 
200   caml_spacetime_static_shape_tables = &caml_spacetime_shapes;
201 
202   ap_interval = caml_secure_getenv ("OCAML_SPACETIME_INTERVAL");
203   if (ap_interval != NULL) {
204     unsigned int interval = 0;
205     sscanf(ap_interval, "%u", &interval);
206     if (interval != 0) {
207       double time;
208       char cwd[4096];
209       char* user_specified_automatic_snapshot_dir;
210       int dir_ok = 1;
211 
212       user_specified_automatic_snapshot_dir =
213         caml_secure_getenv("OCAML_SPACETIME_SNAPSHOT_DIR");
214 
215       if (user_specified_automatic_snapshot_dir == NULL) {
216 #ifdef HAS_GETCWD
217         if (getcwd(cwd, sizeof(cwd)) == NULL) {
218           dir_ok = 0;
219         }
220 #else
221         if (getwd(cwd) == NULL) {
222           dir_ok = 0;
223         }
224 #endif
225         if (dir_ok) {
226           automatic_snapshot_dir = strdup(cwd);
227         }
228       }
229       else {
230         automatic_snapshot_dir =
231           strdup(user_specified_automatic_snapshot_dir);
232       }
233 
234       if (dir_ok) {
235         automatic_snapshots = 1;
236         open_snapshot_channel();
237         if (automatic_snapshots) {
238 #ifdef SIGINT
239           /* Catch interrupt so that the profile can be completed.
240              We do this by marking the signal as handled without
241              specifying an actual handler. This causes the signal
242              to be handled by a call to exit. */
243           caml_set_signal_action(SIGINT, 2);
244 #endif
245           snapshot_interval = interval / 1e3;
246           time = caml_sys_time_unboxed(Val_unit);
247           next_snapshot_time = time + snapshot_interval;
248           atexit(&caml_spacetime_automatic_save);
249         }
250       }
251     }
252   }
253 }
254 
caml_spacetime_register_shapes(void * dynlinked_table)255 void caml_spacetime_register_shapes(void* dynlinked_table)
256 {
257   shape_table* table;
258   table = (shape_table*) malloc(sizeof(shape_table));
259   if (table == NULL) {
260     fprintf(stderr, "Out of memory whilst registering shape table");
261     abort();
262   }
263   table->table = (uint64_t*) dynlinked_table;
264   table->next = caml_spacetime_dynamic_shape_tables;
265   caml_spacetime_dynamic_shape_tables = table;
266 }
267 
caml_spacetime_trie_is_initialized(value v_unit)268 CAMLprim value caml_spacetime_trie_is_initialized (value v_unit)
269 {
270   return (caml_spacetime_trie_root == Val_unit) ? Val_false : Val_true;
271 }
272 
caml_spacetime_get_trie_root(value v_unit)273 CAMLprim value caml_spacetime_get_trie_root (value v_unit)
274 {
275   return caml_spacetime_trie_root;
276 }
277 
caml_spacetime_register_thread(value * trie_node_root,value * finaliser_trie_node_root)278 void caml_spacetime_register_thread(
279   value* trie_node_root, value* finaliser_trie_node_root)
280 {
281   per_thread* thr;
282 
283   thr = (per_thread*) malloc(sizeof(per_thread));
284   if (thr == NULL) {
285     fprintf(stderr, "Out of memory while registering thread for profiling\n");
286     abort();
287   }
288   thr->next = per_threads;
289   per_threads = thr;
290 
291   thr->trie_node_root = trie_node_root;
292   thr->finaliser_trie_node_root = finaliser_trie_node_root;
293 
294   /* CR-soon mshinwell: record thread ID (and for the main thread too) */
295 
296   num_per_threads++;
297 }
298 
caml_spacetime_save_event_internal(value v_time_opt,struct channel * chan,value v_event_name)299 static void caml_spacetime_save_event_internal (value v_time_opt,
300                                                 struct channel* chan,
301                                                 value v_event_name)
302 {
303   value v_time;
304   double time_override = 0.0;
305   int use_time_override = 0;
306 
307   if (Is_block(v_time_opt)) {
308     time_override = Double_field(Field(v_time_opt, 0), 0);
309     use_time_override = 1;
310   }
311   v_time = caml_spacetime_timestamp(time_override, use_time_override);
312 
313   Lock(chan);
314   caml_output_val(chan, Val_long(2), Val_long(0));
315   caml_output_val(chan, v_event_name, Val_long(0));
316   caml_extern_allow_out_of_heap = 1;
317   caml_output_val(chan, v_time, Val_long(0));
318   caml_extern_allow_out_of_heap = 0;
319   Unlock(chan);
320 
321   caml_stat_free(Hp_val(v_time));
322 }
323 
caml_spacetime_save_event(value v_time_opt,value v_channel,value v_event_name)324 CAMLprim value caml_spacetime_save_event (value v_time_opt,
325                                           value v_channel,
326                                           value v_event_name)
327 {
328   struct channel* chan = Channel(v_channel);
329 
330   caml_spacetime_save_event_internal(v_time_opt, chan, v_event_name);
331 
332   return Val_unit;
333 }
334 
335 
save_trie(struct channel * chan,double time_override,int use_time_override)336 void save_trie (struct channel *chan, double time_override,
337                 int use_time_override)
338 {
339   value v_time, v_frames, v_shapes;
340   /* CR-someday mshinwell: The commented-out changes here are for multicore,
341      where we think we should have one trie per domain. */
342   /* int num_marshalled = 0;
343   per_thread* thr = per_threads; */
344 
345   Lock(chan);
346 
347   caml_output_val(chan, Val_long(1), Val_long(0));
348 
349   v_time = caml_spacetime_timestamp(time_override, use_time_override);
350   v_frames = caml_spacetime_frame_table();
351   v_shapes = caml_spacetime_shape_table();
352 
353   caml_extern_allow_out_of_heap = 1;
354   caml_output_val(chan, v_time, Val_long(0));
355   caml_output_val(chan, v_frames, Val_long(0));
356   caml_output_val(chan, v_shapes, Val_long(0));
357   caml_extern_allow_out_of_heap = 0;
358 
359   caml_output_val(chan, Val_long(1) /* Val_long(num_per_threads + 1) */,
360     Val_long(0));
361 
362   /* Marshal both the main and finaliser tries, for all threads that have
363      been created, to an [out_channel].  This can be done by using the
364      extern.c code as usual, since the trie looks like standard OCaml values;
365      but we must allow it to traverse outside the heap. */
366 
367   caml_extern_allow_out_of_heap = 1;
368   caml_output_val(chan, caml_spacetime_trie_root, Val_long(0));
369   caml_output_val(chan,
370     caml_spacetime_finaliser_trie_root_main_thread, Val_long(0));
371   /* while (thr != NULL) {
372     caml_output_val(chan, *(thr->trie_node_root), Val_long(0));
373     caml_output_val(chan, *(thr->finaliser_trie_node_root),
374       Val_long(0));
375     thr = thr->next;
376     num_marshalled++;
377   }
378   Assert(num_marshalled == num_per_threads); */
379   caml_extern_allow_out_of_heap = 0;
380 
381   Unlock(chan);
382 }
383 
caml_spacetime_save_trie(value v_time_opt,value v_channel)384 CAMLprim value caml_spacetime_save_trie (value v_time_opt, value v_channel)
385 {
386   struct channel* channel = Channel(v_channel);
387   double time_override = 0.0;
388   int use_time_override = 0;
389 
390   if (Is_block(v_time_opt)) {
391     time_override = Double_field(Field(v_time_opt, 0), 0);
392     use_time_override = 1;
393   }
394 
395   save_trie(channel, time_override, use_time_override);
396 
397   return Val_unit;
398 }
399 
caml_spacetime_classify_c_node(c_node * node)400 c_node_type caml_spacetime_classify_c_node(c_node* node)
401 {
402   return (node->pc & 2) ? CALL : ALLOCATION;
403 }
404 
caml_spacetime_c_node_of_stored_pointer(value node_stored)405 c_node* caml_spacetime_c_node_of_stored_pointer(value node_stored)
406 {
407   Assert(node_stored == Val_unit || Is_c_node(node_stored));
408   return (node_stored == Val_unit) ? NULL : (c_node*) Hp_val(node_stored);
409 }
410 
caml_spacetime_c_node_of_stored_pointer_not_null(value node_stored)411 c_node* caml_spacetime_c_node_of_stored_pointer_not_null(
412       value node_stored)
413 {
414   Assert(Is_c_node(node_stored));
415   return (c_node*) Hp_val(node_stored);
416 }
417 
caml_spacetime_stored_pointer_of_c_node(c_node * c_node)418 value caml_spacetime_stored_pointer_of_c_node(c_node* c_node)
419 {
420   value node;
421   Assert(c_node != NULL);
422   node = Val_hp(c_node);
423   Assert(Is_c_node(node));
424   return node;
425 }
426 
427 #ifdef HAS_LIBUNWIND
pc_inside_c_node_matches(c_node * node,void * pc)428 static int pc_inside_c_node_matches(c_node* node, void* pc)
429 {
430   return Decode_c_node_pc(node->pc) == pc;
431 }
432 #endif
433 
allocate_uninitialized_ocaml_node(int size_including_header)434 static value allocate_uninitialized_ocaml_node(int size_including_header)
435 {
436   void* node;
437   uintnat size;
438 
439   Assert(size_including_header >= 3);
440   node = caml_stat_alloc(sizeof(uintnat) * size_including_header);
441 
442   size = size_including_header * sizeof(value);
443 
444   node = (void*) start_of_free_node_block;
445   if (end_of_free_node_block - start_of_free_node_block < size) {
446     reinitialise_free_node_block();
447     node = (void*) start_of_free_node_block;
448     Assert(end_of_free_node_block - start_of_free_node_block >= size);
449   }
450 
451   start_of_free_node_block += size;
452 
453   /* We don't currently rely on [uintnat] alignment, but we do need some
454      alignment, so just be sure. */
455   Assert (((uintnat) node) % sizeof(uintnat) == 0);
456   return Val_hp(node);
457 }
458 
find_tail_node(value node,void * callee)459 static value find_tail_node(value node, void* callee)
460 {
461   /* Search the tail chain within [node] (which corresponds to an invocation
462      of a caller of [callee]) to determine whether it contains a tail node
463      corresponding to [callee].  Returns any such node, or [Val_unit] if no
464      such node exists. */
465 
466   value starting_node;
467   value pc;
468   value found = Val_unit;
469 
470   starting_node = node;
471   pc = Encode_node_pc(callee);
472 
473   do {
474     Assert(Is_ocaml_node(node));
475     if (Node_pc(node) == pc) {
476       found = node;
477     }
478     else {
479       node = Tail_link(node);
480     }
481   } while (found == Val_unit && starting_node != node);
482 
483   return found;
484 }
485 
caml_spacetime_allocate_node(int size_including_header,void * pc,value * node_hole)486 CAMLprim value caml_spacetime_allocate_node(
487       int size_including_header, void* pc, value* node_hole)
488 {
489   value node;
490   value caller_node = Val_unit;
491 
492   node = *node_hole;
493   /* The node hole should either contain [Val_unit], indicating that this
494      function was not tail called and we have not been to this point in the
495      trie before; or it should contain a value encoded using
496      [Encoded_tail_caller_node] that points at the node of a caller
497      that tail called the current function.  (Such a value is necessary to
498      be able to find the start of the caller's node, and hence its tail
499      chain, so we as a tail-called callee can link ourselves in.) */
500   Assert(Is_tail_caller_node_encoded(node));
501 
502   if (node != Val_unit) {
503     value tail_node;
504     /* The callee was tail called.  Find whether there already exists a node
505        for it in the tail call chain within the caller's node.  The caller's
506        node must always be an OCaml node. */
507     caller_node = Decode_tail_caller_node(node);
508     tail_node = find_tail_node(caller_node, pc);
509     if (tail_node != Val_unit) {
510       /* This tail calling sequence has happened before; just fill the hole
511          with the existing node and return. */
512       *node_hole = tail_node;
513       return 0;  /* indicates an existing node was returned */
514     }
515   }
516 
517   node = allocate_uninitialized_ocaml_node(size_including_header);
518   Hd_val(node) =
519     Make_header(size_including_header - 1, OCaml_node_tag, Caml_black);
520   Assert((((uintnat) pc) % 1) == 0);
521   Node_pc(node) = Encode_node_pc(pc);
522   /* If the callee was tail called, then the tail link field will link this
523      new node into an existing tail chain.  Otherwise, it is initialized with
524      the empty tail chain, i.e. the one pointing directly at [node]. */
525   if (caller_node == Val_unit) {
526     Tail_link(node) = node;
527   }
528   else {
529     Tail_link(node) = Tail_link(caller_node);
530     Tail_link(caller_node) = node;
531   }
532 
533   /* The callee node pointers for direct tail call points are
534      initialized from code emitted by the OCaml compiler.  This is done to
535      avoid having to pass this function a description of which nodes are
536      direct tail call points.  (We cannot just count them and put them at the
537      beginning of the node because we need the indexes of elements within the
538      node during instruction selection before we have found all call points.)
539 
540      All other fields have already been initialised by
541      [reinitialise_free_node_block].
542   */
543 
544   *node_hole = node;
545 
546   return 1;  /* indicates a new node was created */
547 }
548 
allocate_c_node(void)549 static c_node* allocate_c_node(void)
550 {
551   c_node* node;
552   size_t index;
553 
554   node = (c_node*) start_of_free_node_block;
555   if (end_of_free_node_block - start_of_free_node_block < sizeof(c_node)) {
556     reinitialise_free_node_block();
557     node = (c_node*) start_of_free_node_block;
558     Assert(end_of_free_node_block - start_of_free_node_block
559       >= sizeof(c_node));
560   }
561   start_of_free_node_block += sizeof(c_node);
562 
563   Assert((sizeof(c_node) % sizeof(uintnat)) == 0);
564 
565   /* CR-soon mshinwell: remove this and pad the structure properly */
566   for (index = 0; index < sizeof(c_node) / sizeof(value); index++) {
567     ((value*) node)[index] = Val_unit;
568   }
569 
570   node->gc_header =
571     Make_header(sizeof(c_node)/sizeof(uintnat) - 1, C_node_tag, Caml_black);
572   node->data.callee_node = Val_unit;
573   node->next = Val_unit;
574 
575   return node;
576 }
577 
578 /* Since a given indirect call site either always yields tail calls or
579    always yields non-tail calls, the output of
580    [caml_spacetime_indirect_node_hole_ptr] is uniquely determined by its
581    first two arguments (the callee and the node hole).  We cache these
582    to increase performance of recursive functions containing an indirect
583    call (e.g. [List.map] when not inlined). */
584 static void* last_indirect_node_hole_ptr_callee;
585 static value* last_indirect_node_hole_ptr_node_hole;
586 static value* last_indirect_node_hole_ptr_result;
587 
caml_spacetime_indirect_node_hole_ptr(void * callee,value * node_hole,value caller_node)588 CAMLprim value* caml_spacetime_indirect_node_hole_ptr
589       (void* callee, value* node_hole, value caller_node)
590 {
591   /* Find the address of the node hole for an indirect call to [callee].
592      If [caller_node] is not [Val_unit], it is a pointer to the caller's
593      node, and indicates that this is a tail call site. */
594 
595   c_node* c_node;
596   value encoded_callee;
597 
598   if (callee == last_indirect_node_hole_ptr_callee
599       && node_hole == last_indirect_node_hole_ptr_node_hole) {
600     return last_indirect_node_hole_ptr_result;
601   }
602 
603   last_indirect_node_hole_ptr_callee = callee;
604   last_indirect_node_hole_ptr_node_hole = node_hole;
605 
606   encoded_callee = Encode_c_node_pc_for_call(callee);
607 
608   while (*node_hole != Val_unit) {
609     Assert(((uintnat) *node_hole) % sizeof(value) == 0);
610 
611     c_node = caml_spacetime_c_node_of_stored_pointer_not_null(*node_hole);
612 
613     Assert(c_node != NULL);
614     Assert(caml_spacetime_classify_c_node(c_node) == CALL);
615 
616     if (c_node->pc == encoded_callee) {
617       last_indirect_node_hole_ptr_result = &(c_node->data.callee_node);
618       return last_indirect_node_hole_ptr_result;
619     }
620     else {
621       node_hole = &c_node->next;
622     }
623   }
624 
625   c_node = allocate_c_node();
626   c_node->pc = encoded_callee;
627 
628   if (caller_node != Val_unit) {
629     /* This is a tail call site.
630        Perform the initialization equivalent to that emitted by
631        [Spacetime.code_for_function_prologue] for direct tail call
632        sites. */
633     c_node->data.callee_node = Encode_tail_caller_node(caller_node);
634   }
635 
636   *node_hole = caml_spacetime_stored_pointer_of_c_node(c_node);
637 
638   Assert(((uintnat) *node_hole) % sizeof(value) == 0);
639   Assert(*node_hole != Val_unit);
640 
641   last_indirect_node_hole_ptr_result = &(c_node->data.callee_node);
642 
643   return last_indirect_node_hole_ptr_result;
644 }
645 
646 /* Some notes on why caml_call_gc doesn't need a distinguished node.
647    (Remember that thread switches are irrelevant here because each thread
648    has its own trie.)
649 
650    caml_call_gc only invokes OCaml functions in the following circumstances:
651    1. running an OCaml finaliser;
652    2. executing an OCaml signal handler.
653    Both of these are done on the finaliser trie.  Furthermore, both of
654    these invocations start via caml_callback; the code in this file for
655    handling that (caml_spacetime_c_to_ocaml) correctly copes with that by
656    attaching a single "caml_start_program" node that can cope with any
657    number of indirect OCaml calls from that point.
658 
659    caml_call_gc may also invoke C functions that cause allocation.  All of
660    these (assuming libunwind support is present) will cause a chain of
661    c_node structures to be attached to the trie, starting at the node hole
662    passed to caml_call_gc from OCaml code.  These structures are extensible
663    and can thus accommodate any number of C backtraces leading from
664    caml_call_gc.
665 */
666 /* CR-soon mshinwell: it might in fact be the case now that nothing called
667    from caml_call_gc will do any allocation that ends up on the trie.  We
668    can revisit this after the first release. */
669 
find_trie_node_from_libunwind(int for_allocation,uintnat wosize,struct ext_table ** cached_frames)670 static NOINLINE void* find_trie_node_from_libunwind(int for_allocation,
671     uintnat wosize, struct ext_table** cached_frames)
672 {
673 #ifdef HAS_LIBUNWIND
674   /* Given that [caml_last_return_address] is the most recent call site in
675      OCaml code, and that we are now in C (or other) code called from that
676      site, obtain a backtrace using libunwind and graft the most recent
677      portion (everything back to but not including [caml_last_return_address])
678      onto the trie.  See the important comment below regarding the fact that
679      call site, and not callee, addresses are recorded during this process.
680 
681      If [for_allocation] is non-zero, the final node recorded will be for
682      an allocation, and the returned pointer is to the allocation node.
683      Otherwise, no node is recorded for the innermost frame, and the
684      returned pointer is a pointer to the *node hole* where a node for that
685      frame should be attached.
686 
687      If [for_allocation] is non-zero then [wosize] must give the size in
688      words, excluding the header, of the value being allocated.
689 
690      If [cached_frames != NULL] then:
691      1. If [*cached_frames] is NULL then save the captured backtrace in a
692         newly-allocated table and store the pointer to that table in
693         [*cached_frames];
694      2. Otherwise use [*cached_frames] as the unwinding information.
695      The intention is that when the context is known (e.g. a function such
696      as [caml_make_vect] known to have been directly invoked from OCaml),
697      we can avoid expensive calls to libunwind.
698   */
699 
700   unw_cursor_t cur;
701   unw_context_t ctx;
702   int ret;
703   int innermost_frame;
704   int frame;
705   static struct ext_table frames_local;
706   struct ext_table* frames;
707   static int ext_table_initialised = 0;
708   int have_frames_already = 0;
709   value* node_hole;
710   c_node* node = NULL;
711   int initial_table_size = 1000;
712   int must_initialise_node_for_allocation = 0;
713 
714   if (!cached_frames) {
715     if (!ext_table_initialised) {
716       caml_ext_table_init(&frames_local, initial_table_size);
717       ext_table_initialised = 1;
718     }
719     else {
720       caml_ext_table_clear(&frames_local, 0);
721     }
722     frames = &frames_local;
723   } else {
724     if (*cached_frames) {
725       frames = *cached_frames;
726       have_frames_already = 1;
727     }
728     else {
729       frames = (struct ext_table*) malloc(sizeof(struct ext_table));
730       if (!frames) {
731         caml_fatal_error("Not enough memory for ext_table allocation");
732       }
733       caml_ext_table_init(frames, initial_table_size);
734       *cached_frames = frames;
735     }
736   }
737 
738   if (!have_frames_already) {
739     /* Get the stack backtrace as far as [caml_last_return_address]. */
740 
741     ret = unw_getcontext(&ctx);
742     if (ret != UNW_ESUCCESS) {
743       return NULL;
744     }
745 
746     ret = unw_init_local(&cur, &ctx);
747     if (ret != UNW_ESUCCESS) {
748       return NULL;
749     }
750 
751     while ((ret = unw_step(&cur)) > 0) {
752       unw_word_t ip;
753       unw_get_reg(&cur, UNW_REG_IP, &ip);
754       if (caml_last_return_address == (uintnat) ip) {
755         break;
756       }
757       else {
758         /* Inlined some of [caml_ext_table_add] for speed. */
759         if (frames->size < frames->capacity) {
760           frames->contents[frames->size++] = (void*) ip;
761         } else {
762           caml_ext_table_add(frames, (void*) ip);
763         }
764       }
765     }
766   }
767 
768   /* We always need to ignore the frames for:
769       #0  find_trie_node_from_libunwind
770       #1  caml_spacetime_c_to_ocaml
771      Further, if this is not an allocation point, we should not create the
772      node for the current C function that triggered us (i.e. frame #2). */
773   innermost_frame = for_allocation ? 1 : 2;
774 
775   if (frames->size - 1 < innermost_frame) {
776     /* Insufficiently many frames (maybe no frames) returned from
777        libunwind; just don't do anything. */
778     return NULL;
779   }
780 
781   node_hole = caml_spacetime_trie_node_ptr;
782   /* Note that if [node_hole] is filled, then it must point to a C node,
783      since it is not possible for there to be a call point in an OCaml
784      function that sometimes calls C and sometimes calls OCaml. */
785 
786   for (frame = frames->size - 1; frame >= innermost_frame; frame--) {
787     c_node_type expected_type;
788     void* pc = frames->contents[frame];
789     Assert (pc != (void*) caml_last_return_address);
790 
791     if (!for_allocation) {
792       expected_type = CALL;
793     }
794     else {
795       expected_type = (frame > innermost_frame ? CALL : ALLOCATION);
796     }
797 
798     if (*node_hole == Val_unit) {
799       node = allocate_c_node();
800       /* Note: for CALL nodes, the PC is the program counter at each call
801          site.  We do not store program counter addresses of the start of
802          callees, unlike for OCaml nodes.  This means that some trie nodes
803          will become conflated.  These can be split during post-processing by
804          working out which function each call site was in. */
805       node->pc = (expected_type == CALL ? Encode_c_node_pc_for_call(pc)
806         : Encode_c_node_pc_for_alloc_point(pc));
807       *node_hole = caml_spacetime_stored_pointer_of_c_node(node);
808       if (expected_type == ALLOCATION) {
809         must_initialise_node_for_allocation = 1;
810       }
811     }
812     else {
813       c_node* prev;
814       int found = 0;
815 
816       node = caml_spacetime_c_node_of_stored_pointer_not_null(*node_hole);
817       Assert(node != NULL);
818       Assert(node->next == Val_unit
819         || (((uintnat) (node->next)) % sizeof(value) == 0));
820 
821       prev = NULL;
822 
823       while (!found && node != NULL) {
824         if (caml_spacetime_classify_c_node(node) == expected_type
825             && pc_inside_c_node_matches(node, pc)) {
826           found = 1;
827         }
828         else {
829           prev = node;
830           node = caml_spacetime_c_node_of_stored_pointer(node->next);
831         }
832       }
833       if (!found) {
834         Assert(prev != NULL);
835         node = allocate_c_node();
836         node->pc = (expected_type == CALL ? Encode_c_node_pc_for_call(pc)
837           : Encode_c_node_pc_for_alloc_point(pc));
838         if (expected_type == ALLOCATION) {
839           must_initialise_node_for_allocation = 1;
840         }
841         prev->next = caml_spacetime_stored_pointer_of_c_node(node);
842       }
843     }
844 
845     Assert(node != NULL);
846 
847     Assert(caml_spacetime_classify_c_node(node) == expected_type);
848     Assert(pc_inside_c_node_matches(node, pc));
849     node_hole = &node->data.callee_node;
850   }
851 
852   if (must_initialise_node_for_allocation) {
853     caml_spacetime_profinfo++;
854     if (caml_spacetime_profinfo > PROFINFO_MASK) {
855       /* Profiling counter overflow. */
856       caml_spacetime_profinfo = PROFINFO_MASK;
857     }
858     node->data.allocation.profinfo =
859       Make_header_with_profinfo(
860         /* "-1" because [c_node] has the GC header as its first
861            element. */
862         offsetof(c_node, data.allocation.count)/sizeof(value) - 1,
863         Infix_tag,
864         Caml_black,
865         caml_spacetime_profinfo);
866     node->data.allocation.count = Val_long(0);
867 
868     /* Add the new allocation point into the linked list of all allocation
869        points. */
870     if (caml_all_allocation_points != NULL) {
871       node->data.allocation.next =
872         (value) &caml_all_allocation_points->count;
873     } else {
874       node->data.allocation.next = Val_unit;
875     }
876     caml_all_allocation_points = &node->data.allocation;
877   }
878 
879   if (for_allocation) {
880     Assert(caml_spacetime_classify_c_node(node) == ALLOCATION);
881     Assert(caml_spacetime_c_node_of_stored_pointer(node->next) != node);
882     Assert(Profinfo_hd(node->data.allocation.profinfo) > 0);
883     node->data.allocation.count =
884       Val_long(Long_val(node->data.allocation.count) + (1 + wosize));
885   }
886 
887   Assert(node->next != (value) NULL);
888 
889   return for_allocation ? (void*) node : (void*) node_hole;
890 #else
891   return NULL;
892 #endif
893 }
894 
caml_spacetime_c_to_ocaml(void * ocaml_entry_point,void * identifying_pc_for_caml_start_program)895 void caml_spacetime_c_to_ocaml(void* ocaml_entry_point,
896       void* identifying_pc_for_caml_start_program)
897 {
898   /* Called in [caml_start_program] and [caml_callback*] when we are about
899      to cross from C into OCaml.  [ocaml_entry_point] is the branch target.
900      This situation is handled by ensuring the presence of a new OCaml node
901      for the callback veneer; the node contains a single indirect call point
902      which accumulates the [ocaml_entry_point]s.
903 
904      The layout of the node is described in the "system shape table"; see
905      asmrun/amd64.S.
906   */
907 
908   value node;
909 
910   /* Update the trie with the current backtrace, as far back as
911      [caml_last_return_address], and leave the node hole pointer at
912      the correct place for attachment of a [caml_start_program] node. */
913 
914 #ifdef HAS_LIBUNWIND
915   value* node_temp;
916   node_temp = (value*) find_trie_node_from_libunwind(0, 0, NULL);
917   if (node_temp != NULL) {
918     caml_spacetime_trie_node_ptr = node_temp;
919   }
920 #endif
921 
922   if (*caml_spacetime_trie_node_ptr == Val_unit) {
923     uintnat size_including_header;
924 
925     size_including_header =
926       1 /* GC header */ + Node_num_header_words + Indirect_num_fields;
927 
928     node = allocate_uninitialized_ocaml_node(size_including_header);
929     Hd_val(node) =
930       Make_header(size_including_header - 1, OCaml_node_tag, Caml_black);
931     Assert((((uintnat) identifying_pc_for_caml_start_program) % 1) == 0);
932     Node_pc(node) = Encode_node_pc(identifying_pc_for_caml_start_program);
933     Tail_link(node) = node;
934     Indirect_pc_linked_list(node, Node_num_header_words) = Val_unit;
935     *caml_spacetime_trie_node_ptr = node;
936   }
937   else {
938     node = *caml_spacetime_trie_node_ptr;
939     /* If there is a node here already, it should never be an initialized
940        (but as yet unused) tail call point, since calls from OCaml into C
941        are never tail calls (and no C -> C call is marked as tail). */
942     Assert(!Is_tail_caller_node_encoded(node));
943   }
944 
945   Assert(Is_ocaml_node(node));
946   Assert(Decode_node_pc(Node_pc(node))
947     == identifying_pc_for_caml_start_program);
948   Assert(Tail_link(node) == node);
949   Assert(Wosize_val(node) == Node_num_header_words + Indirect_num_fields);
950 
951   /* Search the node to find the node hole corresponding to the indirect
952      call to the OCaml function. */
953   caml_spacetime_trie_node_ptr =
954     caml_spacetime_indirect_node_hole_ptr(
955       ocaml_entry_point,
956       &Indirect_pc_linked_list(node, Node_num_header_words),
957       Val_unit);
958   Assert(*caml_spacetime_trie_node_ptr == Val_unit
959     || Is_ocaml_node(*caml_spacetime_trie_node_ptr));
960 }
961 
962 extern void caml_garbage_collection(void);  /* signals_asm.c */
963 extern void caml_array_bound_error(void);  /* fail.c */
964 
caml_spacetime_generate_profinfo(void * profinfo_words,uintnat index_within_node)965 CAMLprim uintnat caml_spacetime_generate_profinfo (void* profinfo_words,
966                                                    uintnat index_within_node)
967 {
968   /* Called from code that creates a value's header inside an OCaml
969      function. */
970 
971   value node;
972   uintnat profinfo;
973 
974   caml_spacetime_profinfo++;
975   if (caml_spacetime_profinfo > PROFINFO_MASK) {
976     /* Profiling counter overflow. */
977     caml_spacetime_profinfo = PROFINFO_MASK;
978   }
979   profinfo = caml_spacetime_profinfo;
980 
981   /* CR-someday mshinwell: we could always use the [struct allocation_point]
982      overlay instead of the macros now. */
983 
984   /* [node] isn't really a node; it points into the middle of
985      one---specifically to the "profinfo" word of an allocation point.
986      It's done like this to avoid re-calculating the place in the node
987      (which already has to be done in the OCaml-generated code run before
988      this function). */
989   node = (value) profinfo_words;
990   Assert(Alloc_point_profinfo(node, 0) == Val_unit);
991 
992   /* The profinfo value is stored shifted to reduce the number of
993      instructions required on the OCaml side.  It also enables us to use
994      [Infix_tag] to obtain valid value pointers into the middle of nodes,
995      which is used for the linked list of all allocation points. */
996   profinfo = Make_header_with_profinfo(
997     index_within_node, Infix_tag, Caml_black, profinfo);
998 
999   Assert(!Is_block(profinfo));
1000   Alloc_point_profinfo(node, 0) = profinfo;
1001   /* The count is set to zero by the initialisation when the node was
1002      created (see above). */
1003   Assert(Alloc_point_count(node, 0) == Val_long(0));
1004 
1005   /* Add the new allocation point into the linked list of all allocation
1006      points. */
1007   if (caml_all_allocation_points != NULL) {
1008     Alloc_point_next_ptr(node, 0) = (value) &caml_all_allocation_points->count;
1009   }
1010   else {
1011     Assert(Alloc_point_next_ptr(node, 0) == Val_unit);
1012   }
1013   caml_all_allocation_points = (allocation_point*) node;
1014 
1015   return profinfo;
1016 }
1017 
caml_spacetime_my_profinfo(struct ext_table ** cached_frames,uintnat wosize)1018 uintnat caml_spacetime_my_profinfo (struct ext_table** cached_frames,
1019                                     uintnat wosize)
1020 {
1021   /* Return the profinfo value that should be written into a value's header
1022      during an allocation from C.  This may necessitate extending the trie
1023      with information obtained from libunwind. */
1024 
1025   c_node* node;
1026   uintnat profinfo = 0;
1027 
1028   node = find_trie_node_from_libunwind(1, wosize, cached_frames);
1029   if (node != NULL) {
1030     profinfo = ((uintnat) (node->data.allocation.profinfo)) >> PROFINFO_SHIFT;
1031   }
1032 
1033   return profinfo;  /* N.B. not shifted by PROFINFO_SHIFT */
1034 }
1035 
caml_spacetime_automatic_snapshot(void)1036 void caml_spacetime_automatic_snapshot (void)
1037 {
1038   if (automatic_snapshots) {
1039     double start_time, end_time;
1040     start_time = caml_sys_time_unboxed(Val_unit);
1041     if (start_time >= next_snapshot_time) {
1042       maybe_reopen_snapshot_channel();
1043       caml_spacetime_save_snapshot(snapshot_channel, 0.0, 0);
1044       end_time = caml_sys_time_unboxed(Val_unit);
1045       next_snapshot_time = end_time + snapshot_interval;
1046     }
1047   }
1048 }
1049 
caml_spacetime_save_event_for_automatic_snapshots(value v_event_name)1050 CAMLprim value caml_spacetime_save_event_for_automatic_snapshots
1051   (value v_event_name)
1052 {
1053   if (automatic_snapshots) {
1054     maybe_reopen_snapshot_channel();
1055     caml_spacetime_save_event_internal (Val_unit, snapshot_channel,
1056                                         v_event_name);
1057   }
1058   return Val_unit;
1059 }
1060 
caml_spacetime_automatic_save(void)1061 void caml_spacetime_automatic_save (void)
1062 {
1063   /* Called from [atexit]. */
1064 
1065   if (automatic_snapshots) {
1066     automatic_snapshots = 0;
1067     maybe_reopen_snapshot_channel();
1068     save_trie(snapshot_channel, 0.0, 0);
1069     caml_flush(snapshot_channel);
1070     caml_close_channel(snapshot_channel);
1071   }
1072 }
1073 
caml_spacetime_enabled(value v_unit)1074 CAMLprim value caml_spacetime_enabled (value v_unit)
1075 {
1076   return Val_true;
1077 }
1078 
caml_register_channel_for_spacetime(value v_channel)1079 CAMLprim value caml_register_channel_for_spacetime (value v_channel)
1080 {
1081   struct channel* channel = Channel(v_channel);
1082   channel->flags |= CHANNEL_FLAG_BLOCKING_WRITE;
1083   return Val_unit;
1084 }
1085 
1086 #else
1087 
1088 /* Functions for when the compiler was not configured with "-spacetime". */
1089 
caml_spacetime_write_magic_number(value v_channel)1090 CAMLprim value caml_spacetime_write_magic_number(value v_channel)
1091 {
1092   return Val_unit;
1093 }
1094 
caml_spacetime_enabled(value v_unit)1095 CAMLprim value caml_spacetime_enabled (value v_unit)
1096 {
1097   return Val_false;
1098 }
1099 
caml_spacetime_save_event(value v_time_opt,value v_channel,value v_event_name)1100 CAMLprim value caml_spacetime_save_event (value v_time_opt,
1101                                           value v_channel,
1102                                           value v_event_name)
1103 {
1104   return Val_unit;
1105 }
1106 
caml_spacetime_save_event_for_automatic_snapshots(value v_event_name)1107 CAMLprim value caml_spacetime_save_event_for_automatic_snapshots
1108   (value v_event_name)
1109 {
1110   return Val_unit;
1111 }
1112 
caml_spacetime_save_trie(value ignored)1113 CAMLprim value caml_spacetime_save_trie (value ignored)
1114 {
1115   return Val_unit;
1116 }
1117 
caml_register_channel_for_spacetime(value v_channel)1118 CAMLprim value caml_register_channel_for_spacetime (value v_channel)
1119 {
1120   return Val_unit;
1121 }
1122 
1123 #endif
1124