1 /**************************************************************************/
2 /* */
3 /* OCaml */
4 /* */
5 /* Mark Shinwell and Leo White, Jane Street Europe */
6 /* */
7 /* Copyright 2013--2016, Jane Street Group, LLC */
8 /* */
9 /* All rights reserved. This file is distributed under the terms of */
10 /* the GNU Lesser General Public License version 2.1, with the */
11 /* special exception on linking described in the file LICENSE. */
12 /* */
13 /**************************************************************************/
14
15 #define CAML_INTERNALS
16
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <limits.h>
21 #include <math.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <signal.h>
26 #include "caml/config.h"
27 #ifdef HAS_UNISTD
28 #include <unistd.h>
29 #endif
30
31 #include "caml/alloc.h"
32 #include "caml/backtrace_prim.h"
33 #include "caml/fail.h"
34 #include "caml/gc.h"
35 #include "caml/intext.h"
36 #include "caml/major_gc.h"
37 #include "caml/memory.h"
38 #include "caml/minor_gc.h"
39 #include "caml/misc.h"
40 #include "caml/mlvalues.h"
41 #include "caml/osdeps.h"
42 #include "caml/roots.h"
43 #include "caml/signals.h"
44 #include "caml/stack.h"
45 #include "caml/sys.h"
46 #include "caml/spacetime.h"
47
48 #ifdef WITH_SPACETIME
49
50 /* We force "noinline" in certain places to be sure we know how many
51 frames there will be on the stack. */
52 #define NOINLINE __attribute__((noinline))
53
54 #ifdef HAS_LIBUNWIND
55 #define UNW_LOCAL_ONLY
56 #include "libunwind.h"
57 #endif
58
59 static int automatic_snapshots = 0;
60 static double snapshot_interval = 0.0;
61 static double next_snapshot_time = 0.0;
62 static struct channel *snapshot_channel;
63 static int pid_when_snapshot_channel_opened;
64
65 extern value caml_spacetime_debug(value);
66
67 static char* start_of_free_node_block;
68 static char* end_of_free_node_block;
69
70 typedef struct per_thread {
71 value* trie_node_root;
72 value* finaliser_trie_node_root;
73 struct per_thread* next;
74 } per_thread;
75
76 /* List of tries corresponding to threads that have been created. */
77 /* CR-soon mshinwell: just include the main trie in this list. */
78 static per_thread* per_threads = NULL;
79 static int num_per_threads = 0;
80
81 /* [caml_spacetime_shapes] is defined in the startup file. */
82 extern uint64_t* caml_spacetime_shapes;
83
84 uint64_t** caml_spacetime_static_shape_tables = NULL;
85 shape_table* caml_spacetime_dynamic_shape_tables = NULL;
86
87 static uintnat caml_spacetime_profinfo = (uintnat) 0;
88
89 value caml_spacetime_trie_root = Val_unit;
90 value* caml_spacetime_trie_node_ptr = &caml_spacetime_trie_root;
91
92 static value caml_spacetime_finaliser_trie_root_main_thread = Val_unit;
93 value* caml_spacetime_finaliser_trie_root
94 = &caml_spacetime_finaliser_trie_root_main_thread;
95
96 /* CR-someday mshinwell: think about thread safety of the manipulation of
97 this list for multicore */
98 allocation_point* caml_all_allocation_points = NULL;
99
100 static const uintnat chunk_size = 1024 * 1024;
101
reinitialise_free_node_block(void)102 static void reinitialise_free_node_block(void)
103 {
104 size_t index;
105
106 start_of_free_node_block = (char*) malloc(chunk_size);
107 end_of_free_node_block = start_of_free_node_block + chunk_size;
108
109 for (index = 0; index < chunk_size / sizeof(value); index++) {
110 ((value*) start_of_free_node_block)[index] = Val_unit;
111 }
112 }
113
114 #ifndef O_BINARY
115 #define O_BINARY 0
116 #endif
117
118 #if defined (_WIN32) || defined (_WIN64)
119 extern value val_process_id;
120 #endif
121
122 static uint32_t version_number = 0;
123 static uint32_t magic_number_base = 0xace00ace;
124
caml_spacetime_write_magic_number_internal(struct channel * chan)125 static void caml_spacetime_write_magic_number_internal(struct channel* chan)
126 {
127 value magic_number =
128 Val_long(((uint64_t) magic_number_base)
129 | (((uint64_t) version_number) << 32));
130
131 Lock(chan);
132 caml_output_val(chan, magic_number, Val_long(0));
133 Unlock(chan);
134 }
135
caml_spacetime_write_magic_number(value v_channel)136 CAMLprim value caml_spacetime_write_magic_number(value v_channel)
137 {
138 caml_spacetime_write_magic_number_internal(Channel(v_channel));
139 return Val_unit;
140 }
141
142 static char* automatic_snapshot_dir;
143
open_snapshot_channel(void)144 static void open_snapshot_channel(void)
145 {
146 int fd;
147 char filename[8192];
148 int pid;
149 #if defined (_WIN32) || defined (_WIN64)
150 pid = Int_val(val_process_id);
151 #else
152 pid = getpid();
153 #endif
154 snprintf(filename, 8192, "%s/spacetime-%d", automatic_snapshot_dir, pid);
155 filename[8191] = '\0';
156 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
157 if (fd == -1) {
158 automatic_snapshots = 0;
159 }
160 else {
161 snapshot_channel = caml_open_descriptor_out(fd);
162 snapshot_channel->flags |= CHANNEL_FLAG_BLOCKING_WRITE;
163 pid_when_snapshot_channel_opened = pid;
164 caml_spacetime_write_magic_number_internal(snapshot_channel);
165 }
166 }
167
maybe_reopen_snapshot_channel(void)168 static void maybe_reopen_snapshot_channel(void)
169 {
170 /* This function should be used before writing to the automatic snapshot
171 channel. It detects whether we have forked since the channel was opened.
172 If so, we close the old channel (ignoring any errors just in case the
173 old fd has been closed, e.g. in a double-fork situation where the middle
174 process has a loop to manually close all fds and no Spacetime snapshot
175 was written during that time) and then open a new one. */
176
177 int pid;
178 #if defined (_WIN32) || defined (_WIN64)
179 pid = Int_val(val_process_id);
180 #else
181 pid = getpid();
182 #endif
183
184 if (pid != pid_when_snapshot_channel_opened) {
185 caml_close_channel(snapshot_channel);
186 open_snapshot_channel();
187 }
188 }
189
190 extern void caml_spacetime_automatic_save(void);
191
caml_spacetime_initialize(void)192 void caml_spacetime_initialize(void)
193 {
194 /* Note that this is called very early (even prior to GC initialisation). */
195
196 char *ap_interval;
197
198 reinitialise_free_node_block();
199
200 caml_spacetime_static_shape_tables = &caml_spacetime_shapes;
201
202 ap_interval = caml_secure_getenv ("OCAML_SPACETIME_INTERVAL");
203 if (ap_interval != NULL) {
204 unsigned int interval = 0;
205 sscanf(ap_interval, "%u", &interval);
206 if (interval != 0) {
207 double time;
208 char cwd[4096];
209 char* user_specified_automatic_snapshot_dir;
210 int dir_ok = 1;
211
212 user_specified_automatic_snapshot_dir =
213 caml_secure_getenv("OCAML_SPACETIME_SNAPSHOT_DIR");
214
215 if (user_specified_automatic_snapshot_dir == NULL) {
216 #ifdef HAS_GETCWD
217 if (getcwd(cwd, sizeof(cwd)) == NULL) {
218 dir_ok = 0;
219 }
220 #else
221 if (getwd(cwd) == NULL) {
222 dir_ok = 0;
223 }
224 #endif
225 if (dir_ok) {
226 automatic_snapshot_dir = strdup(cwd);
227 }
228 }
229 else {
230 automatic_snapshot_dir =
231 strdup(user_specified_automatic_snapshot_dir);
232 }
233
234 if (dir_ok) {
235 automatic_snapshots = 1;
236 open_snapshot_channel();
237 if (automatic_snapshots) {
238 #ifdef SIGINT
239 /* Catch interrupt so that the profile can be completed.
240 We do this by marking the signal as handled without
241 specifying an actual handler. This causes the signal
242 to be handled by a call to exit. */
243 caml_set_signal_action(SIGINT, 2);
244 #endif
245 snapshot_interval = interval / 1e3;
246 time = caml_sys_time_unboxed(Val_unit);
247 next_snapshot_time = time + snapshot_interval;
248 atexit(&caml_spacetime_automatic_save);
249 }
250 }
251 }
252 }
253 }
254
caml_spacetime_register_shapes(void * dynlinked_table)255 void caml_spacetime_register_shapes(void* dynlinked_table)
256 {
257 shape_table* table;
258 table = (shape_table*) malloc(sizeof(shape_table));
259 if (table == NULL) {
260 fprintf(stderr, "Out of memory whilst registering shape table");
261 abort();
262 }
263 table->table = (uint64_t*) dynlinked_table;
264 table->next = caml_spacetime_dynamic_shape_tables;
265 caml_spacetime_dynamic_shape_tables = table;
266 }
267
caml_spacetime_trie_is_initialized(value v_unit)268 CAMLprim value caml_spacetime_trie_is_initialized (value v_unit)
269 {
270 return (caml_spacetime_trie_root == Val_unit) ? Val_false : Val_true;
271 }
272
caml_spacetime_get_trie_root(value v_unit)273 CAMLprim value caml_spacetime_get_trie_root (value v_unit)
274 {
275 return caml_spacetime_trie_root;
276 }
277
caml_spacetime_register_thread(value * trie_node_root,value * finaliser_trie_node_root)278 void caml_spacetime_register_thread(
279 value* trie_node_root, value* finaliser_trie_node_root)
280 {
281 per_thread* thr;
282
283 thr = (per_thread*) malloc(sizeof(per_thread));
284 if (thr == NULL) {
285 fprintf(stderr, "Out of memory while registering thread for profiling\n");
286 abort();
287 }
288 thr->next = per_threads;
289 per_threads = thr;
290
291 thr->trie_node_root = trie_node_root;
292 thr->finaliser_trie_node_root = finaliser_trie_node_root;
293
294 /* CR-soon mshinwell: record thread ID (and for the main thread too) */
295
296 num_per_threads++;
297 }
298
caml_spacetime_save_event_internal(value v_time_opt,struct channel * chan,value v_event_name)299 static void caml_spacetime_save_event_internal (value v_time_opt,
300 struct channel* chan,
301 value v_event_name)
302 {
303 value v_time;
304 double time_override = 0.0;
305 int use_time_override = 0;
306
307 if (Is_block(v_time_opt)) {
308 time_override = Double_field(Field(v_time_opt, 0), 0);
309 use_time_override = 1;
310 }
311 v_time = caml_spacetime_timestamp(time_override, use_time_override);
312
313 Lock(chan);
314 caml_output_val(chan, Val_long(2), Val_long(0));
315 caml_output_val(chan, v_event_name, Val_long(0));
316 caml_extern_allow_out_of_heap = 1;
317 caml_output_val(chan, v_time, Val_long(0));
318 caml_extern_allow_out_of_heap = 0;
319 Unlock(chan);
320
321 caml_stat_free(Hp_val(v_time));
322 }
323
caml_spacetime_save_event(value v_time_opt,value v_channel,value v_event_name)324 CAMLprim value caml_spacetime_save_event (value v_time_opt,
325 value v_channel,
326 value v_event_name)
327 {
328 struct channel* chan = Channel(v_channel);
329
330 caml_spacetime_save_event_internal(v_time_opt, chan, v_event_name);
331
332 return Val_unit;
333 }
334
335
save_trie(struct channel * chan,double time_override,int use_time_override)336 void save_trie (struct channel *chan, double time_override,
337 int use_time_override)
338 {
339 value v_time, v_frames, v_shapes;
340 /* CR-someday mshinwell: The commented-out changes here are for multicore,
341 where we think we should have one trie per domain. */
342 /* int num_marshalled = 0;
343 per_thread* thr = per_threads; */
344
345 Lock(chan);
346
347 caml_output_val(chan, Val_long(1), Val_long(0));
348
349 v_time = caml_spacetime_timestamp(time_override, use_time_override);
350 v_frames = caml_spacetime_frame_table();
351 v_shapes = caml_spacetime_shape_table();
352
353 caml_extern_allow_out_of_heap = 1;
354 caml_output_val(chan, v_time, Val_long(0));
355 caml_output_val(chan, v_frames, Val_long(0));
356 caml_output_val(chan, v_shapes, Val_long(0));
357 caml_extern_allow_out_of_heap = 0;
358
359 caml_output_val(chan, Val_long(1) /* Val_long(num_per_threads + 1) */,
360 Val_long(0));
361
362 /* Marshal both the main and finaliser tries, for all threads that have
363 been created, to an [out_channel]. This can be done by using the
364 extern.c code as usual, since the trie looks like standard OCaml values;
365 but we must allow it to traverse outside the heap. */
366
367 caml_extern_allow_out_of_heap = 1;
368 caml_output_val(chan, caml_spacetime_trie_root, Val_long(0));
369 caml_output_val(chan,
370 caml_spacetime_finaliser_trie_root_main_thread, Val_long(0));
371 /* while (thr != NULL) {
372 caml_output_val(chan, *(thr->trie_node_root), Val_long(0));
373 caml_output_val(chan, *(thr->finaliser_trie_node_root),
374 Val_long(0));
375 thr = thr->next;
376 num_marshalled++;
377 }
378 Assert(num_marshalled == num_per_threads); */
379 caml_extern_allow_out_of_heap = 0;
380
381 Unlock(chan);
382 }
383
caml_spacetime_save_trie(value v_time_opt,value v_channel)384 CAMLprim value caml_spacetime_save_trie (value v_time_opt, value v_channel)
385 {
386 struct channel* channel = Channel(v_channel);
387 double time_override = 0.0;
388 int use_time_override = 0;
389
390 if (Is_block(v_time_opt)) {
391 time_override = Double_field(Field(v_time_opt, 0), 0);
392 use_time_override = 1;
393 }
394
395 save_trie(channel, time_override, use_time_override);
396
397 return Val_unit;
398 }
399
caml_spacetime_classify_c_node(c_node * node)400 c_node_type caml_spacetime_classify_c_node(c_node* node)
401 {
402 return (node->pc & 2) ? CALL : ALLOCATION;
403 }
404
caml_spacetime_c_node_of_stored_pointer(value node_stored)405 c_node* caml_spacetime_c_node_of_stored_pointer(value node_stored)
406 {
407 Assert(node_stored == Val_unit || Is_c_node(node_stored));
408 return (node_stored == Val_unit) ? NULL : (c_node*) Hp_val(node_stored);
409 }
410
caml_spacetime_c_node_of_stored_pointer_not_null(value node_stored)411 c_node* caml_spacetime_c_node_of_stored_pointer_not_null(
412 value node_stored)
413 {
414 Assert(Is_c_node(node_stored));
415 return (c_node*) Hp_val(node_stored);
416 }
417
caml_spacetime_stored_pointer_of_c_node(c_node * c_node)418 value caml_spacetime_stored_pointer_of_c_node(c_node* c_node)
419 {
420 value node;
421 Assert(c_node != NULL);
422 node = Val_hp(c_node);
423 Assert(Is_c_node(node));
424 return node;
425 }
426
427 #ifdef HAS_LIBUNWIND
pc_inside_c_node_matches(c_node * node,void * pc)428 static int pc_inside_c_node_matches(c_node* node, void* pc)
429 {
430 return Decode_c_node_pc(node->pc) == pc;
431 }
432 #endif
433
allocate_uninitialized_ocaml_node(int size_including_header)434 static value allocate_uninitialized_ocaml_node(int size_including_header)
435 {
436 void* node;
437 uintnat size;
438
439 Assert(size_including_header >= 3);
440 node = caml_stat_alloc(sizeof(uintnat) * size_including_header);
441
442 size = size_including_header * sizeof(value);
443
444 node = (void*) start_of_free_node_block;
445 if (end_of_free_node_block - start_of_free_node_block < size) {
446 reinitialise_free_node_block();
447 node = (void*) start_of_free_node_block;
448 Assert(end_of_free_node_block - start_of_free_node_block >= size);
449 }
450
451 start_of_free_node_block += size;
452
453 /* We don't currently rely on [uintnat] alignment, but we do need some
454 alignment, so just be sure. */
455 Assert (((uintnat) node) % sizeof(uintnat) == 0);
456 return Val_hp(node);
457 }
458
find_tail_node(value node,void * callee)459 static value find_tail_node(value node, void* callee)
460 {
461 /* Search the tail chain within [node] (which corresponds to an invocation
462 of a caller of [callee]) to determine whether it contains a tail node
463 corresponding to [callee]. Returns any such node, or [Val_unit] if no
464 such node exists. */
465
466 value starting_node;
467 value pc;
468 value found = Val_unit;
469
470 starting_node = node;
471 pc = Encode_node_pc(callee);
472
473 do {
474 Assert(Is_ocaml_node(node));
475 if (Node_pc(node) == pc) {
476 found = node;
477 }
478 else {
479 node = Tail_link(node);
480 }
481 } while (found == Val_unit && starting_node != node);
482
483 return found;
484 }
485
caml_spacetime_allocate_node(int size_including_header,void * pc,value * node_hole)486 CAMLprim value caml_spacetime_allocate_node(
487 int size_including_header, void* pc, value* node_hole)
488 {
489 value node;
490 value caller_node = Val_unit;
491
492 node = *node_hole;
493 /* The node hole should either contain [Val_unit], indicating that this
494 function was not tail called and we have not been to this point in the
495 trie before; or it should contain a value encoded using
496 [Encoded_tail_caller_node] that points at the node of a caller
497 that tail called the current function. (Such a value is necessary to
498 be able to find the start of the caller's node, and hence its tail
499 chain, so we as a tail-called callee can link ourselves in.) */
500 Assert(Is_tail_caller_node_encoded(node));
501
502 if (node != Val_unit) {
503 value tail_node;
504 /* The callee was tail called. Find whether there already exists a node
505 for it in the tail call chain within the caller's node. The caller's
506 node must always be an OCaml node. */
507 caller_node = Decode_tail_caller_node(node);
508 tail_node = find_tail_node(caller_node, pc);
509 if (tail_node != Val_unit) {
510 /* This tail calling sequence has happened before; just fill the hole
511 with the existing node and return. */
512 *node_hole = tail_node;
513 return 0; /* indicates an existing node was returned */
514 }
515 }
516
517 node = allocate_uninitialized_ocaml_node(size_including_header);
518 Hd_val(node) =
519 Make_header(size_including_header - 1, OCaml_node_tag, Caml_black);
520 Assert((((uintnat) pc) % 1) == 0);
521 Node_pc(node) = Encode_node_pc(pc);
522 /* If the callee was tail called, then the tail link field will link this
523 new node into an existing tail chain. Otherwise, it is initialized with
524 the empty tail chain, i.e. the one pointing directly at [node]. */
525 if (caller_node == Val_unit) {
526 Tail_link(node) = node;
527 }
528 else {
529 Tail_link(node) = Tail_link(caller_node);
530 Tail_link(caller_node) = node;
531 }
532
533 /* The callee node pointers for direct tail call points are
534 initialized from code emitted by the OCaml compiler. This is done to
535 avoid having to pass this function a description of which nodes are
536 direct tail call points. (We cannot just count them and put them at the
537 beginning of the node because we need the indexes of elements within the
538 node during instruction selection before we have found all call points.)
539
540 All other fields have already been initialised by
541 [reinitialise_free_node_block].
542 */
543
544 *node_hole = node;
545
546 return 1; /* indicates a new node was created */
547 }
548
allocate_c_node(void)549 static c_node* allocate_c_node(void)
550 {
551 c_node* node;
552 size_t index;
553
554 node = (c_node*) start_of_free_node_block;
555 if (end_of_free_node_block - start_of_free_node_block < sizeof(c_node)) {
556 reinitialise_free_node_block();
557 node = (c_node*) start_of_free_node_block;
558 Assert(end_of_free_node_block - start_of_free_node_block
559 >= sizeof(c_node));
560 }
561 start_of_free_node_block += sizeof(c_node);
562
563 Assert((sizeof(c_node) % sizeof(uintnat)) == 0);
564
565 /* CR-soon mshinwell: remove this and pad the structure properly */
566 for (index = 0; index < sizeof(c_node) / sizeof(value); index++) {
567 ((value*) node)[index] = Val_unit;
568 }
569
570 node->gc_header =
571 Make_header(sizeof(c_node)/sizeof(uintnat) - 1, C_node_tag, Caml_black);
572 node->data.callee_node = Val_unit;
573 node->next = Val_unit;
574
575 return node;
576 }
577
578 /* Since a given indirect call site either always yields tail calls or
579 always yields non-tail calls, the output of
580 [caml_spacetime_indirect_node_hole_ptr] is uniquely determined by its
581 first two arguments (the callee and the node hole). We cache these
582 to increase performance of recursive functions containing an indirect
583 call (e.g. [List.map] when not inlined). */
584 static void* last_indirect_node_hole_ptr_callee;
585 static value* last_indirect_node_hole_ptr_node_hole;
586 static value* last_indirect_node_hole_ptr_result;
587
caml_spacetime_indirect_node_hole_ptr(void * callee,value * node_hole,value caller_node)588 CAMLprim value* caml_spacetime_indirect_node_hole_ptr
589 (void* callee, value* node_hole, value caller_node)
590 {
591 /* Find the address of the node hole for an indirect call to [callee].
592 If [caller_node] is not [Val_unit], it is a pointer to the caller's
593 node, and indicates that this is a tail call site. */
594
595 c_node* c_node;
596 value encoded_callee;
597
598 if (callee == last_indirect_node_hole_ptr_callee
599 && node_hole == last_indirect_node_hole_ptr_node_hole) {
600 return last_indirect_node_hole_ptr_result;
601 }
602
603 last_indirect_node_hole_ptr_callee = callee;
604 last_indirect_node_hole_ptr_node_hole = node_hole;
605
606 encoded_callee = Encode_c_node_pc_for_call(callee);
607
608 while (*node_hole != Val_unit) {
609 Assert(((uintnat) *node_hole) % sizeof(value) == 0);
610
611 c_node = caml_spacetime_c_node_of_stored_pointer_not_null(*node_hole);
612
613 Assert(c_node != NULL);
614 Assert(caml_spacetime_classify_c_node(c_node) == CALL);
615
616 if (c_node->pc == encoded_callee) {
617 last_indirect_node_hole_ptr_result = &(c_node->data.callee_node);
618 return last_indirect_node_hole_ptr_result;
619 }
620 else {
621 node_hole = &c_node->next;
622 }
623 }
624
625 c_node = allocate_c_node();
626 c_node->pc = encoded_callee;
627
628 if (caller_node != Val_unit) {
629 /* This is a tail call site.
630 Perform the initialization equivalent to that emitted by
631 [Spacetime.code_for_function_prologue] for direct tail call
632 sites. */
633 c_node->data.callee_node = Encode_tail_caller_node(caller_node);
634 }
635
636 *node_hole = caml_spacetime_stored_pointer_of_c_node(c_node);
637
638 Assert(((uintnat) *node_hole) % sizeof(value) == 0);
639 Assert(*node_hole != Val_unit);
640
641 last_indirect_node_hole_ptr_result = &(c_node->data.callee_node);
642
643 return last_indirect_node_hole_ptr_result;
644 }
645
646 /* Some notes on why caml_call_gc doesn't need a distinguished node.
647 (Remember that thread switches are irrelevant here because each thread
648 has its own trie.)
649
650 caml_call_gc only invokes OCaml functions in the following circumstances:
651 1. running an OCaml finaliser;
652 2. executing an OCaml signal handler.
653 Both of these are done on the finaliser trie. Furthermore, both of
654 these invocations start via caml_callback; the code in this file for
655 handling that (caml_spacetime_c_to_ocaml) correctly copes with that by
656 attaching a single "caml_start_program" node that can cope with any
657 number of indirect OCaml calls from that point.
658
659 caml_call_gc may also invoke C functions that cause allocation. All of
660 these (assuming libunwind support is present) will cause a chain of
661 c_node structures to be attached to the trie, starting at the node hole
662 passed to caml_call_gc from OCaml code. These structures are extensible
663 and can thus accommodate any number of C backtraces leading from
664 caml_call_gc.
665 */
666 /* CR-soon mshinwell: it might in fact be the case now that nothing called
667 from caml_call_gc will do any allocation that ends up on the trie. We
668 can revisit this after the first release. */
669
find_trie_node_from_libunwind(int for_allocation,uintnat wosize,struct ext_table ** cached_frames)670 static NOINLINE void* find_trie_node_from_libunwind(int for_allocation,
671 uintnat wosize, struct ext_table** cached_frames)
672 {
673 #ifdef HAS_LIBUNWIND
674 /* Given that [caml_last_return_address] is the most recent call site in
675 OCaml code, and that we are now in C (or other) code called from that
676 site, obtain a backtrace using libunwind and graft the most recent
677 portion (everything back to but not including [caml_last_return_address])
678 onto the trie. See the important comment below regarding the fact that
679 call site, and not callee, addresses are recorded during this process.
680
681 If [for_allocation] is non-zero, the final node recorded will be for
682 an allocation, and the returned pointer is to the allocation node.
683 Otherwise, no node is recorded for the innermost frame, and the
684 returned pointer is a pointer to the *node hole* where a node for that
685 frame should be attached.
686
687 If [for_allocation] is non-zero then [wosize] must give the size in
688 words, excluding the header, of the value being allocated.
689
690 If [cached_frames != NULL] then:
691 1. If [*cached_frames] is NULL then save the captured backtrace in a
692 newly-allocated table and store the pointer to that table in
693 [*cached_frames];
694 2. Otherwise use [*cached_frames] as the unwinding information.
695 The intention is that when the context is known (e.g. a function such
696 as [caml_make_vect] known to have been directly invoked from OCaml),
697 we can avoid expensive calls to libunwind.
698 */
699
700 unw_cursor_t cur;
701 unw_context_t ctx;
702 int ret;
703 int innermost_frame;
704 int frame;
705 static struct ext_table frames_local;
706 struct ext_table* frames;
707 static int ext_table_initialised = 0;
708 int have_frames_already = 0;
709 value* node_hole;
710 c_node* node = NULL;
711 int initial_table_size = 1000;
712 int must_initialise_node_for_allocation = 0;
713
714 if (!cached_frames) {
715 if (!ext_table_initialised) {
716 caml_ext_table_init(&frames_local, initial_table_size);
717 ext_table_initialised = 1;
718 }
719 else {
720 caml_ext_table_clear(&frames_local, 0);
721 }
722 frames = &frames_local;
723 } else {
724 if (*cached_frames) {
725 frames = *cached_frames;
726 have_frames_already = 1;
727 }
728 else {
729 frames = (struct ext_table*) malloc(sizeof(struct ext_table));
730 if (!frames) {
731 caml_fatal_error("Not enough memory for ext_table allocation");
732 }
733 caml_ext_table_init(frames, initial_table_size);
734 *cached_frames = frames;
735 }
736 }
737
738 if (!have_frames_already) {
739 /* Get the stack backtrace as far as [caml_last_return_address]. */
740
741 ret = unw_getcontext(&ctx);
742 if (ret != UNW_ESUCCESS) {
743 return NULL;
744 }
745
746 ret = unw_init_local(&cur, &ctx);
747 if (ret != UNW_ESUCCESS) {
748 return NULL;
749 }
750
751 while ((ret = unw_step(&cur)) > 0) {
752 unw_word_t ip;
753 unw_get_reg(&cur, UNW_REG_IP, &ip);
754 if (caml_last_return_address == (uintnat) ip) {
755 break;
756 }
757 else {
758 /* Inlined some of [caml_ext_table_add] for speed. */
759 if (frames->size < frames->capacity) {
760 frames->contents[frames->size++] = (void*) ip;
761 } else {
762 caml_ext_table_add(frames, (void*) ip);
763 }
764 }
765 }
766 }
767
768 /* We always need to ignore the frames for:
769 #0 find_trie_node_from_libunwind
770 #1 caml_spacetime_c_to_ocaml
771 Further, if this is not an allocation point, we should not create the
772 node for the current C function that triggered us (i.e. frame #2). */
773 innermost_frame = for_allocation ? 1 : 2;
774
775 if (frames->size - 1 < innermost_frame) {
776 /* Insufficiently many frames (maybe no frames) returned from
777 libunwind; just don't do anything. */
778 return NULL;
779 }
780
781 node_hole = caml_spacetime_trie_node_ptr;
782 /* Note that if [node_hole] is filled, then it must point to a C node,
783 since it is not possible for there to be a call point in an OCaml
784 function that sometimes calls C and sometimes calls OCaml. */
785
786 for (frame = frames->size - 1; frame >= innermost_frame; frame--) {
787 c_node_type expected_type;
788 void* pc = frames->contents[frame];
789 Assert (pc != (void*) caml_last_return_address);
790
791 if (!for_allocation) {
792 expected_type = CALL;
793 }
794 else {
795 expected_type = (frame > innermost_frame ? CALL : ALLOCATION);
796 }
797
798 if (*node_hole == Val_unit) {
799 node = allocate_c_node();
800 /* Note: for CALL nodes, the PC is the program counter at each call
801 site. We do not store program counter addresses of the start of
802 callees, unlike for OCaml nodes. This means that some trie nodes
803 will become conflated. These can be split during post-processing by
804 working out which function each call site was in. */
805 node->pc = (expected_type == CALL ? Encode_c_node_pc_for_call(pc)
806 : Encode_c_node_pc_for_alloc_point(pc));
807 *node_hole = caml_spacetime_stored_pointer_of_c_node(node);
808 if (expected_type == ALLOCATION) {
809 must_initialise_node_for_allocation = 1;
810 }
811 }
812 else {
813 c_node* prev;
814 int found = 0;
815
816 node = caml_spacetime_c_node_of_stored_pointer_not_null(*node_hole);
817 Assert(node != NULL);
818 Assert(node->next == Val_unit
819 || (((uintnat) (node->next)) % sizeof(value) == 0));
820
821 prev = NULL;
822
823 while (!found && node != NULL) {
824 if (caml_spacetime_classify_c_node(node) == expected_type
825 && pc_inside_c_node_matches(node, pc)) {
826 found = 1;
827 }
828 else {
829 prev = node;
830 node = caml_spacetime_c_node_of_stored_pointer(node->next);
831 }
832 }
833 if (!found) {
834 Assert(prev != NULL);
835 node = allocate_c_node();
836 node->pc = (expected_type == CALL ? Encode_c_node_pc_for_call(pc)
837 : Encode_c_node_pc_for_alloc_point(pc));
838 if (expected_type == ALLOCATION) {
839 must_initialise_node_for_allocation = 1;
840 }
841 prev->next = caml_spacetime_stored_pointer_of_c_node(node);
842 }
843 }
844
845 Assert(node != NULL);
846
847 Assert(caml_spacetime_classify_c_node(node) == expected_type);
848 Assert(pc_inside_c_node_matches(node, pc));
849 node_hole = &node->data.callee_node;
850 }
851
852 if (must_initialise_node_for_allocation) {
853 caml_spacetime_profinfo++;
854 if (caml_spacetime_profinfo > PROFINFO_MASK) {
855 /* Profiling counter overflow. */
856 caml_spacetime_profinfo = PROFINFO_MASK;
857 }
858 node->data.allocation.profinfo =
859 Make_header_with_profinfo(
860 /* "-1" because [c_node] has the GC header as its first
861 element. */
862 offsetof(c_node, data.allocation.count)/sizeof(value) - 1,
863 Infix_tag,
864 Caml_black,
865 caml_spacetime_profinfo);
866 node->data.allocation.count = Val_long(0);
867
868 /* Add the new allocation point into the linked list of all allocation
869 points. */
870 if (caml_all_allocation_points != NULL) {
871 node->data.allocation.next =
872 (value) &caml_all_allocation_points->count;
873 } else {
874 node->data.allocation.next = Val_unit;
875 }
876 caml_all_allocation_points = &node->data.allocation;
877 }
878
879 if (for_allocation) {
880 Assert(caml_spacetime_classify_c_node(node) == ALLOCATION);
881 Assert(caml_spacetime_c_node_of_stored_pointer(node->next) != node);
882 Assert(Profinfo_hd(node->data.allocation.profinfo) > 0);
883 node->data.allocation.count =
884 Val_long(Long_val(node->data.allocation.count) + (1 + wosize));
885 }
886
887 Assert(node->next != (value) NULL);
888
889 return for_allocation ? (void*) node : (void*) node_hole;
890 #else
891 return NULL;
892 #endif
893 }
894
caml_spacetime_c_to_ocaml(void * ocaml_entry_point,void * identifying_pc_for_caml_start_program)895 void caml_spacetime_c_to_ocaml(void* ocaml_entry_point,
896 void* identifying_pc_for_caml_start_program)
897 {
898 /* Called in [caml_start_program] and [caml_callback*] when we are about
899 to cross from C into OCaml. [ocaml_entry_point] is the branch target.
900 This situation is handled by ensuring the presence of a new OCaml node
901 for the callback veneer; the node contains a single indirect call point
902 which accumulates the [ocaml_entry_point]s.
903
904 The layout of the node is described in the "system shape table"; see
905 asmrun/amd64.S.
906 */
907
908 value node;
909
910 /* Update the trie with the current backtrace, as far back as
911 [caml_last_return_address], and leave the node hole pointer at
912 the correct place for attachment of a [caml_start_program] node. */
913
914 #ifdef HAS_LIBUNWIND
915 value* node_temp;
916 node_temp = (value*) find_trie_node_from_libunwind(0, 0, NULL);
917 if (node_temp != NULL) {
918 caml_spacetime_trie_node_ptr = node_temp;
919 }
920 #endif
921
922 if (*caml_spacetime_trie_node_ptr == Val_unit) {
923 uintnat size_including_header;
924
925 size_including_header =
926 1 /* GC header */ + Node_num_header_words + Indirect_num_fields;
927
928 node = allocate_uninitialized_ocaml_node(size_including_header);
929 Hd_val(node) =
930 Make_header(size_including_header - 1, OCaml_node_tag, Caml_black);
931 Assert((((uintnat) identifying_pc_for_caml_start_program) % 1) == 0);
932 Node_pc(node) = Encode_node_pc(identifying_pc_for_caml_start_program);
933 Tail_link(node) = node;
934 Indirect_pc_linked_list(node, Node_num_header_words) = Val_unit;
935 *caml_spacetime_trie_node_ptr = node;
936 }
937 else {
938 node = *caml_spacetime_trie_node_ptr;
939 /* If there is a node here already, it should never be an initialized
940 (but as yet unused) tail call point, since calls from OCaml into C
941 are never tail calls (and no C -> C call is marked as tail). */
942 Assert(!Is_tail_caller_node_encoded(node));
943 }
944
945 Assert(Is_ocaml_node(node));
946 Assert(Decode_node_pc(Node_pc(node))
947 == identifying_pc_for_caml_start_program);
948 Assert(Tail_link(node) == node);
949 Assert(Wosize_val(node) == Node_num_header_words + Indirect_num_fields);
950
951 /* Search the node to find the node hole corresponding to the indirect
952 call to the OCaml function. */
953 caml_spacetime_trie_node_ptr =
954 caml_spacetime_indirect_node_hole_ptr(
955 ocaml_entry_point,
956 &Indirect_pc_linked_list(node, Node_num_header_words),
957 Val_unit);
958 Assert(*caml_spacetime_trie_node_ptr == Val_unit
959 || Is_ocaml_node(*caml_spacetime_trie_node_ptr));
960 }
961
962 extern void caml_garbage_collection(void); /* signals_asm.c */
963 extern void caml_array_bound_error(void); /* fail.c */
964
caml_spacetime_generate_profinfo(void * profinfo_words,uintnat index_within_node)965 CAMLprim uintnat caml_spacetime_generate_profinfo (void* profinfo_words,
966 uintnat index_within_node)
967 {
968 /* Called from code that creates a value's header inside an OCaml
969 function. */
970
971 value node;
972 uintnat profinfo;
973
974 caml_spacetime_profinfo++;
975 if (caml_spacetime_profinfo > PROFINFO_MASK) {
976 /* Profiling counter overflow. */
977 caml_spacetime_profinfo = PROFINFO_MASK;
978 }
979 profinfo = caml_spacetime_profinfo;
980
981 /* CR-someday mshinwell: we could always use the [struct allocation_point]
982 overlay instead of the macros now. */
983
984 /* [node] isn't really a node; it points into the middle of
985 one---specifically to the "profinfo" word of an allocation point.
986 It's done like this to avoid re-calculating the place in the node
987 (which already has to be done in the OCaml-generated code run before
988 this function). */
989 node = (value) profinfo_words;
990 Assert(Alloc_point_profinfo(node, 0) == Val_unit);
991
992 /* The profinfo value is stored shifted to reduce the number of
993 instructions required on the OCaml side. It also enables us to use
994 [Infix_tag] to obtain valid value pointers into the middle of nodes,
995 which is used for the linked list of all allocation points. */
996 profinfo = Make_header_with_profinfo(
997 index_within_node, Infix_tag, Caml_black, profinfo);
998
999 Assert(!Is_block(profinfo));
1000 Alloc_point_profinfo(node, 0) = profinfo;
1001 /* The count is set to zero by the initialisation when the node was
1002 created (see above). */
1003 Assert(Alloc_point_count(node, 0) == Val_long(0));
1004
1005 /* Add the new allocation point into the linked list of all allocation
1006 points. */
1007 if (caml_all_allocation_points != NULL) {
1008 Alloc_point_next_ptr(node, 0) = (value) &caml_all_allocation_points->count;
1009 }
1010 else {
1011 Assert(Alloc_point_next_ptr(node, 0) == Val_unit);
1012 }
1013 caml_all_allocation_points = (allocation_point*) node;
1014
1015 return profinfo;
1016 }
1017
caml_spacetime_my_profinfo(struct ext_table ** cached_frames,uintnat wosize)1018 uintnat caml_spacetime_my_profinfo (struct ext_table** cached_frames,
1019 uintnat wosize)
1020 {
1021 /* Return the profinfo value that should be written into a value's header
1022 during an allocation from C. This may necessitate extending the trie
1023 with information obtained from libunwind. */
1024
1025 c_node* node;
1026 uintnat profinfo = 0;
1027
1028 node = find_trie_node_from_libunwind(1, wosize, cached_frames);
1029 if (node != NULL) {
1030 profinfo = ((uintnat) (node->data.allocation.profinfo)) >> PROFINFO_SHIFT;
1031 }
1032
1033 return profinfo; /* N.B. not shifted by PROFINFO_SHIFT */
1034 }
1035
caml_spacetime_automatic_snapshot(void)1036 void caml_spacetime_automatic_snapshot (void)
1037 {
1038 if (automatic_snapshots) {
1039 double start_time, end_time;
1040 start_time = caml_sys_time_unboxed(Val_unit);
1041 if (start_time >= next_snapshot_time) {
1042 maybe_reopen_snapshot_channel();
1043 caml_spacetime_save_snapshot(snapshot_channel, 0.0, 0);
1044 end_time = caml_sys_time_unboxed(Val_unit);
1045 next_snapshot_time = end_time + snapshot_interval;
1046 }
1047 }
1048 }
1049
caml_spacetime_save_event_for_automatic_snapshots(value v_event_name)1050 CAMLprim value caml_spacetime_save_event_for_automatic_snapshots
1051 (value v_event_name)
1052 {
1053 if (automatic_snapshots) {
1054 maybe_reopen_snapshot_channel();
1055 caml_spacetime_save_event_internal (Val_unit, snapshot_channel,
1056 v_event_name);
1057 }
1058 return Val_unit;
1059 }
1060
caml_spacetime_automatic_save(void)1061 void caml_spacetime_automatic_save (void)
1062 {
1063 /* Called from [atexit]. */
1064
1065 if (automatic_snapshots) {
1066 automatic_snapshots = 0;
1067 maybe_reopen_snapshot_channel();
1068 save_trie(snapshot_channel, 0.0, 0);
1069 caml_flush(snapshot_channel);
1070 caml_close_channel(snapshot_channel);
1071 }
1072 }
1073
caml_spacetime_enabled(value v_unit)1074 CAMLprim value caml_spacetime_enabled (value v_unit)
1075 {
1076 return Val_true;
1077 }
1078
caml_register_channel_for_spacetime(value v_channel)1079 CAMLprim value caml_register_channel_for_spacetime (value v_channel)
1080 {
1081 struct channel* channel = Channel(v_channel);
1082 channel->flags |= CHANNEL_FLAG_BLOCKING_WRITE;
1083 return Val_unit;
1084 }
1085
1086 #else
1087
1088 /* Functions for when the compiler was not configured with "-spacetime". */
1089
caml_spacetime_write_magic_number(value v_channel)1090 CAMLprim value caml_spacetime_write_magic_number(value v_channel)
1091 {
1092 return Val_unit;
1093 }
1094
caml_spacetime_enabled(value v_unit)1095 CAMLprim value caml_spacetime_enabled (value v_unit)
1096 {
1097 return Val_false;
1098 }
1099
caml_spacetime_save_event(value v_time_opt,value v_channel,value v_event_name)1100 CAMLprim value caml_spacetime_save_event (value v_time_opt,
1101 value v_channel,
1102 value v_event_name)
1103 {
1104 return Val_unit;
1105 }
1106
caml_spacetime_save_event_for_automatic_snapshots(value v_event_name)1107 CAMLprim value caml_spacetime_save_event_for_automatic_snapshots
1108 (value v_event_name)
1109 {
1110 return Val_unit;
1111 }
1112
caml_spacetime_save_trie(value ignored)1113 CAMLprim value caml_spacetime_save_trie (value ignored)
1114 {
1115 return Val_unit;
1116 }
1117
caml_register_channel_for_spacetime(value v_channel)1118 CAMLprim value caml_register_channel_for_spacetime (value v_channel)
1119 {
1120 return Val_unit;
1121 }
1122
1123 #endif
1124