1 /**************************************************************************/
2 /*                                                                        */
3 /*                                 OCaml                                  */
4 /*                                                                        */
5 /*            Mark Shinwell and Leo White, Jane Street Europe             */
6 /*                                                                        */
7 /*   Copyright 2013--2016, Jane Street Group, LLC                         */
8 /*                                                                        */
9 /*   All rights reserved.  This file is distributed under the terms of    */
10 /*   the GNU Lesser General Public License version 2.1, with the          */
11 /*   special exception on linking described in the file LICENSE.          */
12 /*                                                                        */
13 /**************************************************************************/
14 
15 #define CAML_INTERNALS
16 
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <limits.h>
21 #include <math.h>
22 
23 #include "caml/alloc.h"
24 #include "caml/backtrace_prim.h"
25 #include "caml/config.h"
26 #include "caml/custom.h"
27 #include "caml/fail.h"
28 #include "caml/gc.h"
29 #include "caml/gc_ctrl.h"
30 #include "caml/intext.h"
31 #include "caml/major_gc.h"
32 #include "caml/memory.h"
33 #include "caml/minor_gc.h"
34 #include "caml/misc.h"
35 #include "caml/mlvalues.h"
36 #include "caml/roots.h"
37 #include "caml/signals.h"
38 #include "caml/stack.h"
39 #include "caml/sys.h"
40 #include "caml/spacetime.h"
41 
42 #ifdef WITH_SPACETIME
43 
44 /* The following structures must match the type definitions in the
45    [Spacetime] module. */
46 
47 typedef struct {
48   /* (GC header here.) */
49   value minor_words;
50   value promoted_words;
51   value major_words;
52   value minor_collections;
53   value major_collections;
54   value heap_words;
55   value heap_chunks;
56   value compactions;
57   value top_heap_words;
58 } gc_stats;
59 
60 typedef struct {
61   value profinfo;
62   value num_blocks;
63   value num_words_including_headers;
64 } snapshot_entry;
65 
66 typedef struct {
67   /* (GC header here.) */
68   snapshot_entry entries[0];
69 } snapshot_entries;
70 
71 typedef struct {
72   /* (GC header here.) */
73   value time;
74   value gc_stats;
75   value entries;
76   value words_scanned;
77   value words_scanned_with_profinfo;
78   value total_allocations;
79 } snapshot;
80 
81 typedef struct {
82   uintnat num_blocks;
83   uintnat num_words_including_headers;
84 } raw_snapshot_entry;
85 
allocate_outside_heap_with_tag(mlsize_t size_in_bytes,tag_t tag)86 static value allocate_outside_heap_with_tag(mlsize_t size_in_bytes, tag_t tag)
87 {
88   /* CR-soon mshinwell: this function should live somewhere else */
89   header_t* block;
90 
91   Assert(size_in_bytes % sizeof(value) == 0);
92   block = caml_stat_alloc(sizeof(header_t) + size_in_bytes);
93   *block = Make_header(size_in_bytes / sizeof(value), tag, Caml_black);
94   return (value) &block[1];
95 }
96 
allocate_outside_heap(mlsize_t size_in_bytes)97 static value allocate_outside_heap(mlsize_t size_in_bytes)
98 {
99   Assert(size_in_bytes > 0);
100   return allocate_outside_heap_with_tag(size_in_bytes, 0);
101 }
102 
take_gc_stats(void)103 static value take_gc_stats(void)
104 {
105   value v_stats;
106   gc_stats* stats;
107 
108   v_stats = allocate_outside_heap(sizeof(gc_stats));
109   stats = (gc_stats*) v_stats;
110 
111   stats->minor_words = Val_long(caml_stat_minor_words);
112   stats->promoted_words = Val_long(caml_stat_promoted_words);
113   stats->major_words =
114     Val_long(((uintnat) caml_stat_major_words)
115              + ((uintnat) caml_allocated_words));
116   stats->minor_collections = Val_long(caml_stat_minor_collections);
117   stats->major_collections = Val_long(caml_stat_major_collections);
118   stats->heap_words = Val_long(caml_stat_heap_wsz / sizeof(value));
119   stats->heap_chunks = Val_long(caml_stat_heap_chunks);
120   stats->compactions = Val_long(caml_stat_compactions);
121   stats->top_heap_words = Val_long(caml_stat_top_heap_wsz / sizeof(value));
122 
123   return v_stats;
124 }
125 
get_total_allocations(void)126 static value get_total_allocations(void)
127 {
128   value v_total_allocations = Val_unit;
129   allocation_point* total = caml_all_allocation_points;
130 
131   while (total != NULL) {
132     value v_total;
133     v_total = allocate_outside_heap_with_tag(3 * sizeof(value), 0);
134 
135     /* [v_total] is of type [Raw_spacetime_lib.total_allocations]. */
136     Field(v_total, 0) = Val_long(Profinfo_hd(total->profinfo));
137     Field(v_total, 1) = total->count;
138     Field(v_total, 2) = v_total_allocations;
139     v_total_allocations = v_total;
140 
141     Assert (total->next == Val_unit
142       || (Is_block(total->next) && Tag_val(total->next) == Infix_tag));
143     if (total->next == Val_unit) {
144       total = NULL;
145     }
146     else {
147       total = (allocation_point*) Hp_val(total->next);
148     }
149   }
150 
151   return v_total_allocations;
152 }
153 
take_snapshot(double time_override,int use_time_override)154 static value take_snapshot(double time_override, int use_time_override)
155 {
156   value v_snapshot;
157   snapshot* heap_snapshot;
158   value v_entries;
159   snapshot_entries* entries;
160   char* chunk;
161   value gc_stats;
162   uintnat index;
163   uintnat target_index;
164   value v_time;
165   double time;
166   uintnat profinfo;
167   uintnat num_distinct_profinfos;
168   /* Fixed size buffer to avoid needing a hash table: */
169   static raw_snapshot_entry* raw_entries = NULL;
170   uintnat words_scanned = 0;
171   uintnat words_scanned_with_profinfo = 0;
172   value v_total_allocations;
173 
174   if (!use_time_override) {
175     time = caml_sys_time_unboxed(Val_unit);
176   }
177   else {
178     time = time_override;
179   }
180 
181   gc_stats = take_gc_stats();
182 
183   if (raw_entries == NULL) {
184     size_t size = (PROFINFO_MASK + 1) * sizeof(raw_snapshot_entry);
185     raw_entries = caml_stat_alloc(size);
186     memset(raw_entries, '\0', size);
187   } else {
188     size_t size = (PROFINFO_MASK + 1) * sizeof(raw_snapshot_entry);
189     memset(raw_entries, '\0', size);
190   }
191 
192   num_distinct_profinfos = 0;
193 
194   /* CR-someday mshinwell: consider reintroducing minor heap scanning,
195      properly from roots, which would then give a snapshot function
196      that doesn't do a minor GC.  Although this may not be that important
197      and potentially not worth the effort (it's quite tricky). */
198 
199   /* Scan the major heap. */
200   chunk = caml_heap_start;
201   while (chunk != NULL) {
202     char* hp;
203     char* limit;
204 
205     hp = chunk;
206     limit = chunk + Chunk_size (chunk);
207 
208     while (hp < limit) {
209       header_t hd = Hd_hp (hp);
210       switch (Color_hd(hd)) {
211         case Caml_blue:
212           break;
213 
214         default:
215           if (Wosize_hd(hd) > 0) { /* ignore atoms */
216             profinfo = Profinfo_hd(hd);
217             words_scanned += Whsize_hd(hd);
218             if (profinfo > 0 && profinfo < PROFINFO_MASK) {
219               words_scanned_with_profinfo += Whsize_hd(hd);
220               Assert (raw_entries[profinfo].num_blocks >= 0);
221               if (raw_entries[profinfo].num_blocks == 0) {
222                 num_distinct_profinfos++;
223               }
224               raw_entries[profinfo].num_blocks++;
225               raw_entries[profinfo].num_words_including_headers +=
226                 Whsize_hd(hd);
227             }
228           }
229           break;
230       }
231       hp += Bhsize_hd (hd);
232       Assert (hp <= limit);
233     }
234 
235     chunk = Chunk_next (chunk);
236   }
237 
238   if (num_distinct_profinfos > 0) {
239     v_entries = allocate_outside_heap(
240       num_distinct_profinfos*sizeof(snapshot_entry));
241     entries = (snapshot_entries*) v_entries;
242     target_index = 0;
243     for (index = 0; index <= PROFINFO_MASK; index++) {
244       Assert(raw_entries[index].num_blocks >= 0);
245       if (raw_entries[index].num_blocks > 0) {
246         Assert(target_index < num_distinct_profinfos);
247         entries->entries[target_index].profinfo = Val_long(index);
248         entries->entries[target_index].num_blocks
249           = Val_long(raw_entries[index].num_blocks);
250         entries->entries[target_index].num_words_including_headers
251           = Val_long(raw_entries[index].num_words_including_headers);
252         target_index++;
253       }
254     }
255   } else {
256     v_entries = Atom(0);
257   }
258 
259   Assert(sizeof(double) == sizeof(value));
260   v_time = allocate_outside_heap_with_tag(sizeof(double), Double_tag);
261   Double_field(v_time, 0) = time;
262 
263   v_snapshot = allocate_outside_heap(sizeof(snapshot));
264   heap_snapshot = (snapshot*) v_snapshot;
265 
266   v_total_allocations = get_total_allocations();
267 
268   heap_snapshot->time = v_time;
269   heap_snapshot->gc_stats = gc_stats;
270   heap_snapshot->entries = v_entries;
271   heap_snapshot->words_scanned
272     = Val_long(words_scanned);
273   heap_snapshot->words_scanned_with_profinfo
274     = Val_long(words_scanned_with_profinfo);
275   heap_snapshot->total_allocations = v_total_allocations;
276 
277   return v_snapshot;
278 }
279 
caml_spacetime_save_snapshot(struct channel * chan,double time_override,int use_time_override)280 void caml_spacetime_save_snapshot (struct channel *chan, double time_override,
281                                    int use_time_override)
282 {
283   value v_snapshot;
284   value v_total_allocations;
285   snapshot* heap_snapshot;
286 
287   Lock(chan);
288 
289   v_snapshot = take_snapshot(time_override, use_time_override);
290 
291   caml_output_val(chan, Val_long(0), Val_long(0));
292 
293   caml_extern_allow_out_of_heap = 1;
294   caml_output_val(chan, v_snapshot, Val_long(0));
295   caml_extern_allow_out_of_heap = 0;
296 
297   Unlock(chan);
298 
299   heap_snapshot = (snapshot*) v_snapshot;
300   caml_stat_free(Hp_val(heap_snapshot->time));
301   caml_stat_free(Hp_val(heap_snapshot->gc_stats));
302   if (Wosize_val(heap_snapshot->entries) > 0) {
303     caml_stat_free(Hp_val(heap_snapshot->entries));
304   }
305   v_total_allocations = heap_snapshot->total_allocations;
306   while (v_total_allocations != Val_unit) {
307     value next = Field(v_total_allocations, 2);
308     caml_stat_free(Hp_val(v_total_allocations));
309     v_total_allocations = next;
310   }
311 
312   caml_stat_free(Hp_val(v_snapshot));
313 }
314 
caml_spacetime_take_snapshot(value v_time_opt,value v_channel)315 CAMLprim value caml_spacetime_take_snapshot(value v_time_opt, value v_channel)
316 {
317   struct channel * channel = Channel(v_channel);
318   double time_override = 0.0;
319   int use_time_override = 0;
320 
321   if (Is_block(v_time_opt)) {
322     time_override = Double_field(Field(v_time_opt, 0), 0);
323     use_time_override = 1;
324   }
325 
326   caml_spacetime_save_snapshot(channel, time_override, use_time_override);
327 
328   return Val_unit;
329 }
330 
331 extern struct custom_operations caml_int64_ops;  /* ints.c */
332 
333 static value
allocate_int64_outside_heap(uint64_t i)334 allocate_int64_outside_heap(uint64_t i)
335 {
336   value v;
337 
338   v = allocate_outside_heap_with_tag(2 * sizeof(value), Custom_tag);
339   Custom_ops_val(v) = &caml_int64_ops;
340   Int64_val(v) = i;
341 
342   return v;
343 }
344 
345 static value
copy_string_outside_heap(char const * s)346 copy_string_outside_heap(char const *s)
347 {
348   int len;
349   mlsize_t wosize, offset_index;
350   value result;
351 
352   len = strlen(s);
353   wosize = (len + sizeof (value)) / sizeof (value);
354   result = allocate_outside_heap_with_tag(wosize * sizeof(value), String_tag);
355 
356   Field (result, wosize - 1) = 0;
357   offset_index = Bsize_wsize (wosize) - 1;
358   Byte (result, offset_index) = offset_index - len;
359   memmove(String_val(result), s, len);
360 
361   return result;
362 }
363 
364 static value
allocate_loc_outside_heap(struct caml_loc_info li)365 allocate_loc_outside_heap(struct caml_loc_info li)
366 {
367   value result;
368 
369   if (li.loc_valid) {
370     result = allocate_outside_heap_with_tag(5 * sizeof(value), 0);
371     Field(result, 0) = Val_bool(li.loc_is_raise);
372     Field(result, 1) = copy_string_outside_heap(li.loc_filename);
373     Field(result, 2) = Val_int(li.loc_lnum);
374     Field(result, 3) = Val_int(li.loc_startchr);
375     Field(result, 4) = Val_int(li.loc_endchr);
376   } else {
377     result = allocate_outside_heap_with_tag(sizeof(value), 1);
378     Field(result, 0) = Val_bool(li.loc_is_raise);
379   }
380 
381   return result;
382 }
383 
caml_spacetime_timestamp(double time_override,int use_time_override)384 value caml_spacetime_timestamp(double time_override, int use_time_override)
385 {
386   double time;
387   value v_time;
388 
389   if (!use_time_override) {
390     time = caml_sys_time_unboxed(Val_unit);
391   }
392   else {
393     time = time_override;
394   }
395 
396   v_time = allocate_outside_heap_with_tag(sizeof(double), Double_tag);
397   Double_field(v_time, 0) = time;
398 
399   return v_time;
400 }
401 
caml_spacetime_frame_table(void)402 value caml_spacetime_frame_table(void)
403 {
404   /* Flatten the frame table into a single associative list. */
405 
406   value list = Val_long(0);  /* the empty list */
407   uintnat i;
408 
409   if (!caml_debug_info_available()) {
410     return list;
411   }
412 
413   if (caml_frame_descriptors == NULL) {
414     caml_init_frame_descriptors();
415   }
416 
417   for (i = 0; i <= caml_frame_descriptors_mask; i++) {
418     frame_descr* descr = caml_frame_descriptors[i];
419     if (descr != NULL) {
420       value location, return_address, pair, new_list_element, location_list;
421       struct caml_loc_info li;
422       debuginfo dbg;
423       if (descr->frame_size != 0xffff) {
424         dbg = caml_debuginfo_extract(descr);
425         if (dbg != NULL) {
426           location_list = Val_unit;
427           while (dbg != NULL) {
428             value list_element;
429 
430             caml_debuginfo_location(dbg, &li);
431             location = allocate_loc_outside_heap(li);
432 
433             list_element =
434               allocate_outside_heap_with_tag(2 * sizeof(value), 0 /* (::) */);
435             Field(list_element, 0) = location;
436             Field(list_element, 1) = location_list;
437             location_list = list_element;
438 
439             dbg = caml_debuginfo_next(dbg);
440           }
441 
442           return_address = allocate_int64_outside_heap(descr->retaddr);
443           pair = allocate_outside_heap_with_tag(2 * sizeof(value), 0);
444           Field(pair, 0) = return_address;
445           Field(pair, 1) = location_list;
446 
447           new_list_element =
448             allocate_outside_heap_with_tag(2 * sizeof(value), 0 /* (::) */);
449           Field(new_list_element, 0) = pair;
450           Field(new_list_element, 1) = list;
451           list = new_list_element;
452         }
453       }
454     }
455   }
456 
457   return list;
458 }
459 
add_unit_to_shape_table(uint64_t * unit_table,value * list)460 static void add_unit_to_shape_table(uint64_t *unit_table, value *list)
461 {
462   /* This function reverses the order of the lists giving the layout of each
463      node; however, spacetime_profiling.ml ensures they are emitted in
464      reverse order, so at the end of it all they're not reversed. */
465 
466   uint64_t* ptr = unit_table;
467 
468   while (*ptr != (uint64_t) 0) {
469     value new_list_element, pair, function_address, layout;
470 
471     function_address =
472       allocate_int64_outside_heap(*ptr++);
473 
474     layout = Val_long(0);  /* the empty list */
475     while (*ptr != (uint64_t) 0) {
476       int tag;
477       int stored_tag;
478       value part_of_shape;
479       value new_part_list_element;
480       value location;
481       int has_extra_argument = 0;
482 
483       stored_tag = *ptr++;
484       /* CR-soon mshinwell: share with emit.mlp */
485       switch (stored_tag) {
486         case 1:  /* direct call to given location */
487           tag = 0;
488           has_extra_argument = 1;  /* the address of the callee */
489           break;
490 
491         case 2:  /* indirect call to given location */
492           tag = 1;
493           break;
494 
495         case 3:  /* allocation at given location */
496           tag = 2;
497           break;
498 
499         default:
500           Assert(0);
501           abort();  /* silence compiler warning */
502       }
503 
504       location = allocate_int64_outside_heap(*ptr++);
505 
506       part_of_shape = allocate_outside_heap_with_tag(
507         sizeof(value) * (has_extra_argument ? 2 : 1), tag);
508       Field(part_of_shape, 0) = location;
509       if (has_extra_argument) {
510         Field(part_of_shape, 1) =
511           allocate_int64_outside_heap(*ptr++);
512       }
513 
514       new_part_list_element =
515         allocate_outside_heap_with_tag(2 * sizeof(value), 0 /* (::) */);
516       Field(new_part_list_element, 0) = part_of_shape;
517       Field(new_part_list_element, 1) = layout;
518       layout = new_part_list_element;
519     }
520 
521     pair = allocate_outside_heap_with_tag(2 * sizeof(value), 0);
522     Field(pair, 0) = function_address;
523     Field(pair, 1) = layout;
524 
525     new_list_element =
526       allocate_outside_heap_with_tag(2 * sizeof(value), 0 /* (::) */);
527     Field(new_list_element, 0) = pair;
528     Field(new_list_element, 1) = *list;
529     *list = new_list_element;
530 
531     ptr++;
532   }
533 }
534 
caml_spacetime_shape_table(void)535 value caml_spacetime_shape_table(void)
536 {
537   value list;
538   uint64_t* unit_table;
539   shape_table *dynamic_table;
540   uint64_t** static_table;
541 
542   /* Flatten the hierarchy of shape tables into a single associative list
543      mapping from function symbols to node layouts.  The node layouts are
544      themselves lists. */
545 
546   list = Val_long(0);  /* the empty list */
547 
548   /* Add static shape tables */
549   static_table = caml_spacetime_static_shape_tables;
550   while (*static_table != (uint64_t) 0) {
551     unit_table = *static_table++;
552     add_unit_to_shape_table(unit_table, &list);
553   }
554 
555   /* Add dynamic shape tables */
556   dynamic_table = caml_spacetime_dynamic_shape_tables;
557 
558   while (dynamic_table != NULL) {
559     unit_table = dynamic_table->table;
560     add_unit_to_shape_table(unit_table, &list);
561     dynamic_table = dynamic_table->next;
562   }
563 
564   return list;
565 }
566 
567 #else
568 
spacetime_disabled()569 static value spacetime_disabled()
570 {
571   caml_failwith("Spacetime profiling not enabled");
572   Assert(0);  /* unreachable */
573 }
574 
caml_spacetime_take_snapshot(value ignored)575 CAMLprim value caml_spacetime_take_snapshot(value ignored)
576 {
577   return Val_unit;
578 }
579 
caml_spacetime_marshal_frame_table()580 CAMLprim value caml_spacetime_marshal_frame_table ()
581 {
582   return spacetime_disabled();
583 }
584 
caml_spacetime_frame_table()585 CAMLprim value caml_spacetime_frame_table ()
586 {
587   return spacetime_disabled();
588 }
589 
caml_spacetime_marshal_shape_table()590 CAMLprim value caml_spacetime_marshal_shape_table ()
591 {
592   return spacetime_disabled();
593 }
594 
caml_spacetime_shape_table()595 CAMLprim value caml_spacetime_shape_table ()
596 {
597   return spacetime_disabled();
598 }
599 
600 #endif
601