1 /**********************************************************************
2
3 iseq.c -
4
5 $Author: k0kubun $
6 created at: 2006-07-11(Tue) 09:00:03 +0900
7
8 Copyright (C) 2006 Koichi Sasada
9
10 **********************************************************************/
11
12 #include "internal.h"
13 #include "ruby/util.h"
14 #include "eval_intern.h"
15
16 #ifdef HAVE_DLADDR
17 # include <dlfcn.h>
18 #endif
19
20 #define RUBY_VM_INSNS_INFO 1
21 /* #define RUBY_MARK_FREE_DEBUG 1 */
22 #include "gc.h"
23 #include "vm_core.h"
24 #include "iseq.h"
25 #include "id_table.h"
26
27 #include "insns.inc"
28 #include "insns_info.inc"
29 #include "mjit.h"
30
31 VALUE rb_cISeq;
32 static VALUE iseqw_new(const rb_iseq_t *iseq);
33 static const rb_iseq_t *iseqw_check(VALUE iseqw);
34
35 #if VM_INSN_INFO_TABLE_IMPL == 2
36 static struct succ_index_table *succ_index_table_create(int max_pos, int *data, int size);
37 static unsigned int *succ_index_table_invert(int max_pos, struct succ_index_table *sd, int size);
38 static int succ_index_lookup(const struct succ_index_table *sd, int x);
39 #endif
40
41 #define hidden_obj_p(obj) (!SPECIAL_CONST_P(obj) && !RBASIC(obj)->klass)
42
43 static inline VALUE
obj_resurrect(VALUE obj)44 obj_resurrect(VALUE obj)
45 {
46 if (hidden_obj_p(obj)) {
47 switch (BUILTIN_TYPE(obj)) {
48 case T_STRING:
49 obj = rb_str_resurrect(obj);
50 break;
51 case T_ARRAY:
52 obj = rb_ary_resurrect(obj);
53 break;
54 case T_HASH:
55 obj = rb_hash_resurrect(obj);
56 break;
57 }
58 }
59 return obj;
60 }
61
62 static void
compile_data_free(struct iseq_compile_data * compile_data)63 compile_data_free(struct iseq_compile_data *compile_data)
64 {
65 if (compile_data) {
66 struct iseq_compile_data_storage *cur, *next;
67 cur = compile_data->storage_head;
68 while (cur) {
69 next = cur->next;
70 ruby_xfree(cur);
71 cur = next;
72 }
73 if (compile_data->ivar_cache_table) {
74 rb_id_table_free(compile_data->ivar_cache_table);
75 }
76 ruby_xfree(compile_data);
77 }
78 }
79
80 void
rb_iseq_free(const rb_iseq_t * iseq)81 rb_iseq_free(const rb_iseq_t *iseq)
82 {
83 RUBY_FREE_ENTER("iseq");
84
85 if (iseq && iseq->body) {
86 struct rb_iseq_constant_body *const body = iseq->body;
87 mjit_free_iseq(iseq); /* Notify MJIT */
88 ruby_xfree((void *)body->iseq_encoded);
89 ruby_xfree((void *)body->insns_info.body);
90 if (body->insns_info.positions) ruby_xfree((void *)body->insns_info.positions);
91 #if VM_INSN_INFO_TABLE_IMPL == 2
92 if (body->insns_info.succ_index_table) ruby_xfree(body->insns_info.succ_index_table);
93 #endif
94 ruby_xfree((void *)body->local_table);
95 ruby_xfree((void *)body->is_entries);
96
97 if (body->ci_entries) {
98 unsigned int i;
99 struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&body->ci_entries[body->ci_size];
100 for (i=0; i<body->ci_kw_size; i++) {
101 const struct rb_call_info_kw_arg *kw_arg = ci_kw_entries[i].kw_arg;
102 ruby_xfree((void *)kw_arg);
103 }
104 ruby_xfree(body->ci_entries);
105 ruby_xfree(body->cc_entries);
106 }
107 ruby_xfree((void *)body->catch_table);
108 ruby_xfree((void *)body->param.opt_table);
109
110 if (body->param.keyword != NULL) {
111 ruby_xfree((void *)body->param.keyword->default_values);
112 ruby_xfree((void *)body->param.keyword);
113 }
114 compile_data_free(ISEQ_COMPILE_DATA(iseq));
115 ruby_xfree(body);
116 }
117
118 if (iseq && ISEQ_EXECUTABLE_P(iseq) && iseq->aux.exec.local_hooks) {
119 rb_hook_list_free(iseq->aux.exec.local_hooks);
120 }
121
122 RUBY_FREE_LEAVE("iseq");
123 }
124
125 #if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
126 static VALUE
rb_vm_insn_addr2insn2(const void * addr)127 rb_vm_insn_addr2insn2(const void *addr)
128 {
129 return (VALUE)rb_vm_insn_addr2insn(addr);
130 }
131 #endif
132
133 static VALUE
rb_vm_insn_null_translator(const void * addr)134 rb_vm_insn_null_translator(const void *addr)
135 {
136 return (VALUE)addr;
137 }
138
139 typedef void iseq_value_itr_t(void *ctx, VALUE obj);
140 typedef VALUE rb_vm_insns_translator_t(const void *addr);
141
142 static int
iseq_extract_values(const VALUE * code,size_t pos,iseq_value_itr_t * func,void * data,rb_vm_insns_translator_t * translator)143 iseq_extract_values(const VALUE *code, size_t pos, iseq_value_itr_t * func, void *data, rb_vm_insns_translator_t * translator)
144 {
145 VALUE insn = translator((void *)code[pos]);
146 int len = insn_len(insn);
147 int op_no;
148 const char *types = insn_op_types(insn);
149
150 for (op_no = 0; types[op_no]; op_no++) {
151 char type = types[op_no];
152 switch (type) {
153 case TS_CDHASH:
154 case TS_ISEQ:
155 case TS_VALUE:
156 {
157 VALUE op = code[pos + op_no + 1];
158 if (!SPECIAL_CONST_P(op)) {
159 func(data, op);
160 }
161 break;
162 }
163 case TS_ISE:
164 {
165 union iseq_inline_storage_entry *const is = (union iseq_inline_storage_entry *)code[pos + op_no + 1];
166 if (is->once.value) {
167 func(data, is->once.value);
168 }
169 break;
170 }
171 default:
172 break;
173 }
174 }
175
176 return len;
177 }
178
179 static void
rb_iseq_each_value(const rb_iseq_t * iseq,iseq_value_itr_t * func,void * data)180 rb_iseq_each_value(const rb_iseq_t *iseq, iseq_value_itr_t * func, void *data)
181 {
182 unsigned int size;
183 const VALUE *code;
184 size_t n;
185 rb_vm_insns_translator_t * translator;
186 const struct rb_iseq_constant_body *const body = iseq->body;
187
188 size = body->iseq_size;
189 code = body->iseq_encoded;
190
191 #if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
192 if (FL_TEST(iseq, ISEQ_TRANSLATED)) {
193 translator = rb_vm_insn_addr2insn2;
194 } else {
195 translator = rb_vm_insn_null_translator;
196 }
197 #else
198 translator = rb_vm_insn_null_translator;
199 #endif
200
201 for (n = 0; n < size;) {
202 n += iseq_extract_values(code, n, func, data, translator);
203 }
204 }
205
206 static void
each_insn_value(void * ctx,VALUE obj)207 each_insn_value(void *ctx, VALUE obj)
208 {
209 rb_gc_mark(obj);
210 }
211
212 void
rb_iseq_mark(const rb_iseq_t * iseq)213 rb_iseq_mark(const rb_iseq_t *iseq)
214 {
215 RUBY_MARK_ENTER("iseq");
216
217 RUBY_MARK_UNLESS_NULL(iseq->wrapper);
218
219 if (iseq->body) {
220 const struct rb_iseq_constant_body *const body = iseq->body;
221
222 if (FL_TEST(iseq, ISEQ_MARKABLE_ISEQ)) {
223 rb_iseq_each_value(iseq, each_insn_value, NULL);
224 }
225
226 rb_gc_mark(body->variable.coverage);
227 rb_gc_mark(body->variable.pc2branchindex);
228 rb_gc_mark(body->location.label);
229 rb_gc_mark(body->location.base_label);
230 rb_gc_mark(body->location.pathobj);
231 RUBY_MARK_UNLESS_NULL((VALUE)body->parent_iseq);
232
233 if (body->param.flags.has_kw && ISEQ_COMPILE_DATA(iseq) == NULL) {
234 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
235 int i, j;
236
237 i = keyword->required_num;
238
239 for (j = 0; i < keyword->num; i++, j++) {
240 VALUE obj = keyword->default_values[j];
241 if (!SPECIAL_CONST_P(obj)) {
242 rb_gc_mark(obj);
243 }
244 }
245 }
246
247 if (body->catch_table) {
248 const struct iseq_catch_table *table = body->catch_table;
249 unsigned int i;
250 for(i = 0; i < table->size; i++) {
251 const struct iseq_catch_table_entry *entry;
252 entry = &table->entries[i];
253 if (entry->iseq) {
254 rb_gc_mark((VALUE)entry->iseq);
255 }
256 }
257 }
258 }
259
260 if (FL_TEST_RAW(iseq, ISEQ_NOT_LOADED_YET)) {
261 rb_gc_mark(iseq->aux.loader.obj);
262 }
263 else if (FL_TEST_RAW(iseq, ISEQ_USE_COMPILE_DATA)) {
264 const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq);
265 VM_ASSERT(compile_data != NULL);
266
267 RUBY_MARK_UNLESS_NULL(compile_data->mark_ary);
268 RUBY_MARK_UNLESS_NULL(compile_data->err_info);
269 RUBY_MARK_UNLESS_NULL(compile_data->catch_table_ary);
270 }
271 else {
272 /* executable */
273 VM_ASSERT(ISEQ_EXECUTABLE_P(iseq));
274 if (iseq->aux.exec.local_hooks) {
275 rb_hook_list_mark(iseq->aux.exec.local_hooks);
276 }
277 }
278
279 RUBY_MARK_LEAVE("iseq");
280 }
281
282 static size_t
param_keyword_size(const struct rb_iseq_param_keyword * pkw)283 param_keyword_size(const struct rb_iseq_param_keyword *pkw)
284 {
285 size_t size = 0;
286
287 if (!pkw) return size;
288
289 size += sizeof(struct rb_iseq_param_keyword);
290 size += sizeof(VALUE) * (pkw->num - pkw->required_num);
291
292 return size;
293 }
294
295 static size_t
iseq_memsize(const rb_iseq_t * iseq)296 iseq_memsize(const rb_iseq_t *iseq)
297 {
298 size_t size = 0; /* struct already counted as RVALUE size */
299 const struct rb_iseq_constant_body *body = iseq->body;
300 const struct iseq_compile_data *compile_data;
301
302 /* TODO: should we count original_iseq? */
303
304 if (body) {
305 struct rb_call_info_with_kwarg *ci_kw_entries = (struct rb_call_info_with_kwarg *)&body->ci_entries[body->ci_size];
306
307 size += sizeof(struct rb_iseq_constant_body);
308 size += body->iseq_size * sizeof(VALUE);
309 size += body->insns_info.size * (sizeof(struct iseq_insn_info_entry) + sizeof(unsigned int));
310 size += body->local_table_size * sizeof(ID);
311 if (body->catch_table) {
312 size += iseq_catch_table_bytes(body->catch_table->size);
313 }
314 size += (body->param.opt_num + 1) * sizeof(VALUE);
315 size += param_keyword_size(body->param.keyword);
316
317 /* body->is_entries */
318 size += body->is_size * sizeof(union iseq_inline_storage_entry);
319
320 /* body->ci_entries */
321 size += body->ci_size * sizeof(struct rb_call_info);
322 size += body->ci_kw_size * sizeof(struct rb_call_info_with_kwarg);
323
324 /* body->cc_entries */
325 size += body->ci_size * sizeof(struct rb_call_cache);
326 size += body->ci_kw_size * sizeof(struct rb_call_cache);
327
328 if (ci_kw_entries) {
329 unsigned int i;
330
331 for (i = 0; i < body->ci_kw_size; i++) {
332 const struct rb_call_info_kw_arg *kw_arg = ci_kw_entries[i].kw_arg;
333
334 if (kw_arg) {
335 size += rb_call_info_kw_arg_bytes(kw_arg->keyword_len);
336 }
337 }
338 }
339 }
340
341 compile_data = ISEQ_COMPILE_DATA(iseq);
342 if (compile_data) {
343 struct iseq_compile_data_storage *cur;
344
345 size += sizeof(struct iseq_compile_data);
346
347 cur = compile_data->storage_head;
348 while (cur) {
349 size += cur->size + offsetof(struct iseq_compile_data_storage, buff);
350 cur = cur->next;
351 }
352 }
353
354 return size;
355 }
356
357 static rb_iseq_t *
iseq_alloc(void)358 iseq_alloc(void)
359 {
360 rb_iseq_t *iseq = iseq_imemo_alloc();
361 iseq->body = ZALLOC(struct rb_iseq_constant_body);
362 return iseq;
363 }
364
365 VALUE
rb_iseq_pathobj_new(VALUE path,VALUE realpath)366 rb_iseq_pathobj_new(VALUE path, VALUE realpath)
367 {
368 VALUE pathobj;
369 VM_ASSERT(RB_TYPE_P(path, T_STRING));
370 VM_ASSERT(realpath == Qnil || RB_TYPE_P(realpath, T_STRING));
371
372 if (path == realpath ||
373 (!NIL_P(realpath) && rb_str_cmp(path, realpath) == 0)) {
374 pathobj = rb_fstring(path);
375 }
376 else {
377 if (!NIL_P(realpath)) realpath = rb_fstring(realpath);
378 pathobj = rb_ary_new_from_args(2, rb_fstring(path), realpath);
379 rb_obj_freeze(pathobj);
380 }
381 return pathobj;
382 }
383
384 void
rb_iseq_pathobj_set(const rb_iseq_t * iseq,VALUE path,VALUE realpath)385 rb_iseq_pathobj_set(const rb_iseq_t *iseq, VALUE path, VALUE realpath)
386 {
387 RB_OBJ_WRITE(iseq, &iseq->body->location.pathobj,
388 rb_iseq_pathobj_new(path, realpath));
389 }
390
391 static rb_iseq_location_t *
iseq_location_setup(rb_iseq_t * iseq,VALUE name,VALUE path,VALUE realpath,VALUE first_lineno,const rb_code_location_t * code_location,const int node_id)392 iseq_location_setup(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE realpath, VALUE first_lineno, const rb_code_location_t *code_location, const int node_id)
393 {
394 rb_iseq_location_t *loc = &iseq->body->location;
395
396 rb_iseq_pathobj_set(iseq, path, realpath);
397 RB_OBJ_WRITE(iseq, &loc->label, name);
398 RB_OBJ_WRITE(iseq, &loc->base_label, name);
399 loc->first_lineno = first_lineno;
400 if (code_location) {
401 loc->node_id = node_id;
402 loc->code_location = *code_location;
403 }
404 else {
405 loc->code_location.beg_pos.lineno = 0;
406 loc->code_location.beg_pos.column = 0;
407 loc->code_location.end_pos.lineno = -1;
408 loc->code_location.end_pos.column = -1;
409 }
410
411 return loc;
412 }
413
414 static void
set_relation(rb_iseq_t * iseq,const rb_iseq_t * piseq)415 set_relation(rb_iseq_t *iseq, const rb_iseq_t *piseq)
416 {
417 struct rb_iseq_constant_body *const body = iseq->body;
418 const VALUE type = body->type;
419
420 /* set class nest stack */
421 if (type == ISEQ_TYPE_TOP) {
422 body->local_iseq = iseq;
423 }
424 else if (type == ISEQ_TYPE_METHOD || type == ISEQ_TYPE_CLASS) {
425 body->local_iseq = iseq;
426 }
427 else if (piseq) {
428 body->local_iseq = piseq->body->local_iseq;
429 }
430
431 if (piseq) {
432 body->parent_iseq = piseq;
433 }
434
435 if (type == ISEQ_TYPE_MAIN) {
436 body->local_iseq = iseq;
437 }
438 }
439
440 static VALUE
prepare_iseq_build(rb_iseq_t * iseq,VALUE name,VALUE path,VALUE realpath,VALUE first_lineno,const rb_code_location_t * code_location,const int node_id,const rb_iseq_t * parent,enum iseq_type type,const rb_compile_option_t * option)441 prepare_iseq_build(rb_iseq_t *iseq,
442 VALUE name, VALUE path, VALUE realpath, VALUE first_lineno, const rb_code_location_t *code_location, const int node_id,
443 const rb_iseq_t *parent, enum iseq_type type,
444 const rb_compile_option_t *option)
445 {
446 VALUE coverage = Qfalse;
447 VALUE err_info = Qnil;
448 struct rb_iseq_constant_body *const body = iseq->body;
449
450 if (parent && (type == ISEQ_TYPE_MAIN || type == ISEQ_TYPE_TOP))
451 err_info = Qfalse;
452
453 body->type = type;
454 set_relation(iseq, parent);
455
456 name = rb_fstring(name);
457 iseq_location_setup(iseq, name, path, realpath, first_lineno, code_location, node_id);
458 if (iseq != body->local_iseq) {
459 RB_OBJ_WRITE(iseq, &body->location.base_label, body->local_iseq->body->location.label);
460 }
461 ISEQ_COVERAGE_SET(iseq, Qnil);
462 ISEQ_ORIGINAL_ISEQ_CLEAR(iseq);
463 body->variable.flip_count = 0;
464
465 ISEQ_COMPILE_DATA_ALLOC(iseq);
466 RB_OBJ_WRITE(iseq, &ISEQ_COMPILE_DATA(iseq)->err_info, err_info);
467 RB_OBJ_WRITE(iseq, &ISEQ_COMPILE_DATA(iseq)->mark_ary, rb_ary_tmp_new(3));
468
469 ISEQ_COMPILE_DATA(iseq)->storage_head = ISEQ_COMPILE_DATA(iseq)->storage_current =
470 (struct iseq_compile_data_storage *)
471 ALLOC_N(char, INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE +
472 offsetof(struct iseq_compile_data_storage, buff));
473
474 RB_OBJ_WRITE(iseq, &ISEQ_COMPILE_DATA(iseq)->catch_table_ary, rb_ary_tmp_new(3));
475 ISEQ_COMPILE_DATA(iseq)->storage_head->pos = 0;
476 ISEQ_COMPILE_DATA(iseq)->storage_head->next = 0;
477 ISEQ_COMPILE_DATA(iseq)->storage_head->size =
478 INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE;
479 ISEQ_COMPILE_DATA(iseq)->option = option;
480
481 ISEQ_COMPILE_DATA(iseq)->ivar_cache_table = NULL;
482
483 if (option->coverage_enabled) {
484 VALUE coverages = rb_get_coverages();
485 if (RTEST(coverages)) {
486 coverage = rb_hash_lookup(coverages, rb_iseq_path(iseq));
487 if (NIL_P(coverage)) coverage = Qfalse;
488 }
489 }
490 ISEQ_COVERAGE_SET(iseq, coverage);
491 if (coverage && ISEQ_BRANCH_COVERAGE(iseq))
492 ISEQ_PC2BRANCHINDEX_SET(iseq, rb_ary_tmp_new(0));
493
494 return Qtrue;
495 }
496
497 #if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
498 static void validate_get_insn_info(const rb_iseq_t *iseq);
499 #endif
500
501 void
rb_iseq_insns_info_encode_positions(const rb_iseq_t * iseq)502 rb_iseq_insns_info_encode_positions(const rb_iseq_t *iseq)
503 {
504 #if VM_INSN_INFO_TABLE_IMPL == 2
505 struct rb_iseq_constant_body *const body = iseq->body;
506 int size = body->insns_info.size;
507 int max_pos = body->iseq_size;
508 int *data = (int *)body->insns_info.positions;
509 if (body->insns_info.succ_index_table) ruby_xfree(body->insns_info.succ_index_table);
510 body->insns_info.succ_index_table = succ_index_table_create(max_pos, data, size);
511 #if VM_CHECK_MODE == 0
512 ruby_xfree(body->insns_info.positions);
513 body->insns_info.positions = NULL;
514 #endif
515 #endif
516 }
517
518 #if VM_INSN_INFO_TABLE_IMPL == 2
519 unsigned int *
rb_iseq_insns_info_decode_positions(const struct rb_iseq_constant_body * body)520 rb_iseq_insns_info_decode_positions(const struct rb_iseq_constant_body *body)
521 {
522 int size = body->insns_info.size;
523 int max_pos = body->iseq_size;
524 struct succ_index_table *sd = body->insns_info.succ_index_table;
525 return succ_index_table_invert(max_pos, sd, size);
526 }
527 #endif
528
529 void
rb_iseq_init_trace(rb_iseq_t * iseq)530 rb_iseq_init_trace(rb_iseq_t *iseq)
531 {
532 iseq->aux.exec.global_trace_events = 0;
533 if (ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS) {
534 rb_iseq_trace_set(iseq, ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS);
535 }
536 }
537
538 static VALUE
finish_iseq_build(rb_iseq_t * iseq)539 finish_iseq_build(rb_iseq_t *iseq)
540 {
541 struct iseq_compile_data *data = ISEQ_COMPILE_DATA(iseq);
542 const struct rb_iseq_constant_body *const body = iseq->body;
543 VALUE err = data->err_info;
544 ISEQ_COMPILE_DATA_CLEAR(iseq);
545 compile_data_free(data);
546
547 #if VM_INSN_INFO_TABLE_IMPL == 2 /* succinct bitvector */
548 /* create succ_index_table */
549 if (body->insns_info.succ_index_table == NULL) {
550 rb_iseq_insns_info_encode_positions(iseq);
551 }
552 #endif
553
554 #if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
555 validate_get_insn_info(iseq);
556 #endif
557
558 if (RTEST(err)) {
559 VALUE path = pathobj_path(body->location.pathobj);
560 if (err == Qtrue) err = rb_exc_new_cstr(rb_eSyntaxError, "compile error");
561 rb_funcallv(err, rb_intern("set_backtrace"), 1, &path);
562 rb_exc_raise(err);
563 }
564
565 rb_iseq_init_trace(iseq);
566 return Qtrue;
567 }
568
569 static rb_compile_option_t COMPILE_OPTION_DEFAULT = {
570 OPT_INLINE_CONST_CACHE, /* int inline_const_cache; */
571 OPT_PEEPHOLE_OPTIMIZATION, /* int peephole_optimization; */
572 OPT_TAILCALL_OPTIMIZATION, /* int tailcall_optimization */
573 OPT_SPECIALISED_INSTRUCTION, /* int specialized_instruction; */
574 OPT_OPERANDS_UNIFICATION, /* int operands_unification; */
575 OPT_INSTRUCTIONS_UNIFICATION, /* int instructions_unification; */
576 OPT_STACK_CACHING, /* int stack_caching; */
577 OPT_FROZEN_STRING_LITERAL,
578 OPT_DEBUG_FROZEN_STRING_LITERAL,
579 TRUE, /* coverage_enabled */
580 };
581
582 static const rb_compile_option_t COMPILE_OPTION_FALSE = {0};
583
584 static void
set_compile_option_from_hash(rb_compile_option_t * option,VALUE opt)585 set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt)
586 {
587 #define SET_COMPILE_OPTION(o, h, mem) \
588 { VALUE flag = rb_hash_aref((h), ID2SYM(rb_intern(#mem))); \
589 if (flag == Qtrue) { (o)->mem = 1; } \
590 else if (flag == Qfalse) { (o)->mem = 0; } \
591 }
592 #define SET_COMPILE_OPTION_NUM(o, h, mem) \
593 { VALUE num = rb_hash_aref(opt, ID2SYM(rb_intern(#mem))); \
594 if (!NIL_P(num)) (o)->mem = NUM2INT(num); \
595 }
596 SET_COMPILE_OPTION(option, opt, inline_const_cache);
597 SET_COMPILE_OPTION(option, opt, peephole_optimization);
598 SET_COMPILE_OPTION(option, opt, tailcall_optimization);
599 SET_COMPILE_OPTION(option, opt, specialized_instruction);
600 SET_COMPILE_OPTION(option, opt, operands_unification);
601 SET_COMPILE_OPTION(option, opt, instructions_unification);
602 SET_COMPILE_OPTION(option, opt, stack_caching);
603 SET_COMPILE_OPTION(option, opt, frozen_string_literal);
604 SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal);
605 SET_COMPILE_OPTION(option, opt, coverage_enabled);
606 SET_COMPILE_OPTION_NUM(option, opt, debug_level);
607 #undef SET_COMPILE_OPTION
608 #undef SET_COMPILE_OPTION_NUM
609 }
610
611 void
rb_iseq_make_compile_option(rb_compile_option_t * option,VALUE opt)612 rb_iseq_make_compile_option(rb_compile_option_t *option, VALUE opt)
613 {
614 Check_Type(opt, T_HASH);
615 set_compile_option_from_hash(option, opt);
616 }
617
618 static void
make_compile_option(rb_compile_option_t * option,VALUE opt)619 make_compile_option(rb_compile_option_t *option, VALUE opt)
620 {
621 if (opt == Qnil) {
622 *option = COMPILE_OPTION_DEFAULT;
623 }
624 else if (opt == Qfalse) {
625 *option = COMPILE_OPTION_FALSE;
626 }
627 else if (opt == Qtrue) {
628 int i;
629 for (i = 0; i < (int)(sizeof(rb_compile_option_t) / sizeof(int)); ++i)
630 ((int *)option)[i] = 1;
631 }
632 else if (RB_TYPE_P(opt, T_HASH)) {
633 *option = COMPILE_OPTION_DEFAULT;
634 set_compile_option_from_hash(option, opt);
635 }
636 else {
637 rb_raise(rb_eTypeError, "Compile option must be Hash/true/false/nil");
638 }
639 }
640
641 static VALUE
make_compile_option_value(rb_compile_option_t * option)642 make_compile_option_value(rb_compile_option_t *option)
643 {
644 VALUE opt = rb_hash_new();
645 #define SET_COMPILE_OPTION(o, h, mem) \
646 rb_hash_aset((h), ID2SYM(rb_intern(#mem)), (o)->mem ? Qtrue : Qfalse)
647 #define SET_COMPILE_OPTION_NUM(o, h, mem) \
648 rb_hash_aset((h), ID2SYM(rb_intern(#mem)), INT2NUM((o)->mem))
649 {
650 SET_COMPILE_OPTION(option, opt, inline_const_cache);
651 SET_COMPILE_OPTION(option, opt, peephole_optimization);
652 SET_COMPILE_OPTION(option, opt, tailcall_optimization);
653 SET_COMPILE_OPTION(option, opt, specialized_instruction);
654 SET_COMPILE_OPTION(option, opt, operands_unification);
655 SET_COMPILE_OPTION(option, opt, instructions_unification);
656 SET_COMPILE_OPTION(option, opt, stack_caching);
657 SET_COMPILE_OPTION(option, opt, frozen_string_literal);
658 SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal);
659 SET_COMPILE_OPTION(option, opt, coverage_enabled);
660 SET_COMPILE_OPTION_NUM(option, opt, debug_level);
661 }
662 #undef SET_COMPILE_OPTION
663 #undef SET_COMPILE_OPTION_NUM
664 return opt;
665 }
666
667 rb_iseq_t *
rb_iseq_new(const rb_ast_body_t * ast,VALUE name,VALUE path,VALUE realpath,const rb_iseq_t * parent,enum iseq_type type)668 rb_iseq_new(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath,
669 const rb_iseq_t *parent, enum iseq_type type)
670 {
671 return rb_iseq_new_with_opt(ast, name, path, realpath, INT2FIX(0), parent, type,
672 &COMPILE_OPTION_DEFAULT);
673 }
674
675 rb_iseq_t *
rb_iseq_new_top(const rb_ast_body_t * ast,VALUE name,VALUE path,VALUE realpath,const rb_iseq_t * parent)676 rb_iseq_new_top(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent)
677 {
678 VALUE coverages = rb_get_coverages();
679 if (RTEST(coverages)) {
680 if (ast->line_count >= 0) {
681 int len = (rb_get_coverage_mode() & COVERAGE_TARGET_ONESHOT_LINES) ? 0 : ast->line_count;
682 VALUE coverage = rb_default_coverage(len);
683 rb_hash_aset(coverages, path, coverage);
684 }
685 }
686
687 return rb_iseq_new_with_opt(ast, name, path, realpath, INT2FIX(0), parent, ISEQ_TYPE_TOP,
688 &COMPILE_OPTION_DEFAULT);
689 }
690
691 rb_iseq_t *
rb_iseq_new_main(const rb_ast_body_t * ast,VALUE path,VALUE realpath,const rb_iseq_t * parent)692 rb_iseq_new_main(const rb_ast_body_t *ast, VALUE path, VALUE realpath, const rb_iseq_t *parent)
693 {
694 return rb_iseq_new_with_opt(ast, rb_fstring_lit("<main>"),
695 path, realpath, INT2FIX(0),
696 parent, ISEQ_TYPE_MAIN, &COMPILE_OPTION_DEFAULT);
697 }
698
699 static inline rb_iseq_t *
iseq_translate(rb_iseq_t * iseq)700 iseq_translate(rb_iseq_t *iseq)
701 {
702 if (rb_respond_to(rb_cISeq, rb_intern("translate"))) {
703 VALUE v1 = iseqw_new(iseq);
704 VALUE v2 = rb_funcall(rb_cISeq, rb_intern("translate"), 1, v1);
705 if (v1 != v2 && CLASS_OF(v2) == rb_cISeq) {
706 iseq = (rb_iseq_t *)iseqw_check(v2);
707 }
708 }
709
710 return iseq;
711 }
712
713 rb_iseq_t *
rb_iseq_new_with_opt(const rb_ast_body_t * ast,VALUE name,VALUE path,VALUE realpath,VALUE first_lineno,const rb_iseq_t * parent,enum iseq_type type,const rb_compile_option_t * option)714 rb_iseq_new_with_opt(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath,
715 VALUE first_lineno, const rb_iseq_t *parent,
716 enum iseq_type type, const rb_compile_option_t *option)
717 {
718 const NODE *node = ast ? ast->root : 0;
719 /* TODO: argument check */
720 rb_iseq_t *iseq = iseq_alloc();
721 rb_compile_option_t new_opt;
722
723 new_opt = option ? *option : COMPILE_OPTION_DEFAULT;
724 if (ast && ast->compile_option) rb_iseq_make_compile_option(&new_opt, ast->compile_option);
725
726 prepare_iseq_build(iseq, name, path, realpath, first_lineno, node ? &node->nd_loc : NULL, node ? nd_node_id(node) : -1, parent, type, &new_opt);
727
728 rb_iseq_compile_node(iseq, node);
729 finish_iseq_build(iseq);
730
731 return iseq_translate(iseq);
732 }
733
734 rb_iseq_t *
rb_iseq_new_ifunc(const struct vm_ifunc * ifunc,VALUE name,VALUE path,VALUE realpath,VALUE first_lineno,const rb_iseq_t * parent,enum iseq_type type,const rb_compile_option_t * option)735 rb_iseq_new_ifunc(const struct vm_ifunc *ifunc, VALUE name, VALUE path, VALUE realpath,
736 VALUE first_lineno, const rb_iseq_t *parent,
737 enum iseq_type type, const rb_compile_option_t *option)
738 {
739 /* TODO: argument check */
740 rb_iseq_t *iseq = iseq_alloc();
741
742 if (!option) option = &COMPILE_OPTION_DEFAULT;
743 prepare_iseq_build(iseq, name, path, realpath, first_lineno, NULL, -1, parent, type, option);
744
745 rb_iseq_compile_ifunc(iseq, ifunc);
746 finish_iseq_build(iseq);
747
748 return iseq_translate(iseq);
749 }
750
751 const rb_iseq_t *
rb_iseq_load_iseq(VALUE fname)752 rb_iseq_load_iseq(VALUE fname)
753 {
754 VALUE iseqv = rb_check_funcall(rb_cISeq, rb_intern("load_iseq"), 1, &fname);
755
756 if (!SPECIAL_CONST_P(iseqv) && RBASIC_CLASS(iseqv) == rb_cISeq) {
757 return iseqw_check(iseqv);
758 }
759
760 return NULL;
761 }
762
763 #define CHECK_ARRAY(v) rb_to_array_type(v)
764 #define CHECK_HASH(v) rb_to_hash_type(v)
765 #define CHECK_STRING(v) rb_str_to_str(v)
766 #define CHECK_SYMBOL(v) rb_to_symbol_type(v)
CHECK_INTEGER(VALUE v)767 static inline VALUE CHECK_INTEGER(VALUE v) {(void)NUM2LONG(v); return v;}
768
769 static enum iseq_type
iseq_type_from_sym(VALUE type)770 iseq_type_from_sym(VALUE type)
771 {
772 const ID id_top = rb_intern("top");
773 const ID id_method = rb_intern("method");
774 const ID id_block = rb_intern("block");
775 const ID id_class = rb_intern("class");
776 const ID id_rescue = rb_intern("rescue");
777 const ID id_ensure = rb_intern("ensure");
778 const ID id_eval = rb_intern("eval");
779 const ID id_main = rb_intern("main");
780 const ID id_plain = rb_intern("plain");
781 /* ensure all symbols are static or pinned down before
782 * conversion */
783 const ID typeid = rb_check_id(&type);
784 if (typeid == id_top) return ISEQ_TYPE_TOP;
785 if (typeid == id_method) return ISEQ_TYPE_METHOD;
786 if (typeid == id_block) return ISEQ_TYPE_BLOCK;
787 if (typeid == id_class) return ISEQ_TYPE_CLASS;
788 if (typeid == id_rescue) return ISEQ_TYPE_RESCUE;
789 if (typeid == id_ensure) return ISEQ_TYPE_ENSURE;
790 if (typeid == id_eval) return ISEQ_TYPE_EVAL;
791 if (typeid == id_main) return ISEQ_TYPE_MAIN;
792 if (typeid == id_plain) return ISEQ_TYPE_PLAIN;
793 return (enum iseq_type)-1;
794 }
795
796 static VALUE
iseq_load(VALUE data,const rb_iseq_t * parent,VALUE opt)797 iseq_load(VALUE data, const rb_iseq_t *parent, VALUE opt)
798 {
799 rb_iseq_t *iseq = iseq_alloc();
800
801 VALUE magic, version1, version2, format_type, misc;
802 VALUE name, path, realpath, first_lineno, code_location, node_id;
803 VALUE type, body, locals, params, exception;
804
805 st_data_t iseq_type;
806 rb_compile_option_t option;
807 int i = 0;
808 rb_code_location_t tmp_loc = { {0, 0}, {-1, -1} };
809
810 /* [magic, major_version, minor_version, format_type, misc,
811 * label, path, first_lineno,
812 * type, locals, args, exception_table, body]
813 */
814
815 data = CHECK_ARRAY(data);
816
817 magic = CHECK_STRING(rb_ary_entry(data, i++));
818 version1 = CHECK_INTEGER(rb_ary_entry(data, i++));
819 version2 = CHECK_INTEGER(rb_ary_entry(data, i++));
820 format_type = CHECK_INTEGER(rb_ary_entry(data, i++));
821 misc = CHECK_HASH(rb_ary_entry(data, i++));
822 ((void)magic, (void)version1, (void)version2, (void)format_type);
823
824 name = CHECK_STRING(rb_ary_entry(data, i++));
825 path = CHECK_STRING(rb_ary_entry(data, i++));
826 realpath = rb_ary_entry(data, i++);
827 realpath = NIL_P(realpath) ? Qnil : CHECK_STRING(realpath);
828 first_lineno = CHECK_INTEGER(rb_ary_entry(data, i++));
829
830 type = CHECK_SYMBOL(rb_ary_entry(data, i++));
831 locals = CHECK_ARRAY(rb_ary_entry(data, i++));
832 params = CHECK_HASH(rb_ary_entry(data, i++));
833 exception = CHECK_ARRAY(rb_ary_entry(data, i++));
834 body = CHECK_ARRAY(rb_ary_entry(data, i++));
835
836 iseq->body->local_iseq = iseq;
837
838 iseq_type = iseq_type_from_sym(type);
839 if (iseq_type == (enum iseq_type)-1) {
840 rb_raise(rb_eTypeError, "unsupport type: :%"PRIsVALUE, rb_sym2str(type));
841 }
842
843 node_id = rb_hash_aref(misc, ID2SYM(rb_intern("node_id")));
844
845 code_location = rb_hash_aref(misc, ID2SYM(rb_intern("code_location")));
846 if (RB_TYPE_P(code_location, T_ARRAY) && RARRAY_LEN(code_location) == 4) {
847 tmp_loc.beg_pos.lineno = NUM2INT(rb_ary_entry(code_location, 0));
848 tmp_loc.beg_pos.column = NUM2INT(rb_ary_entry(code_location, 1));
849 tmp_loc.end_pos.lineno = NUM2INT(rb_ary_entry(code_location, 2));
850 tmp_loc.end_pos.column = NUM2INT(rb_ary_entry(code_location, 3));
851 }
852
853 make_compile_option(&option, opt);
854 option.peephole_optimization = FALSE; /* because peephole optimization can modify original iseq */
855 prepare_iseq_build(iseq, name, path, realpath, first_lineno, &tmp_loc, NUM2INT(node_id),
856 parent, (enum iseq_type)iseq_type, &option);
857
858 rb_iseq_build_from_ary(iseq, misc, locals, params, exception, body);
859
860 finish_iseq_build(iseq);
861
862 return iseqw_new(iseq);
863 }
864
865 /*
866 * :nodoc:
867 */
868 static VALUE
iseq_s_load(int argc,VALUE * argv,VALUE self)869 iseq_s_load(int argc, VALUE *argv, VALUE self)
870 {
871 VALUE data, opt=Qnil;
872 rb_scan_args(argc, argv, "11", &data, &opt);
873 return iseq_load(data, NULL, opt);
874 }
875
876 VALUE
rb_iseq_load(VALUE data,VALUE parent,VALUE opt)877 rb_iseq_load(VALUE data, VALUE parent, VALUE opt)
878 {
879 return iseq_load(data, RTEST(parent) ? (rb_iseq_t *)parent : NULL, opt);
880 }
881
882 rb_iseq_t *
rb_iseq_compile_with_option(VALUE src,VALUE file,VALUE realpath,VALUE line,const struct rb_block * base_block,VALUE opt)883 rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, const struct rb_block *base_block, VALUE opt)
884 {
885 rb_iseq_t *iseq = NULL;
886 const rb_iseq_t *const parent = base_block ? vm_block_iseq(base_block) : NULL;
887 rb_compile_option_t option;
888 const enum iseq_type type = parent ? ISEQ_TYPE_EVAL : ISEQ_TYPE_TOP;
889 #if !defined(__GNUC__) || (__GNUC__ == 4 && __GNUC_MINOR__ == 8)
890 # define INITIALIZED volatile /* suppress warnings by gcc 4.8 */
891 #else
892 # define INITIALIZED /* volatile */
893 #endif
894 rb_ast_t *(*parse)(VALUE vparser, VALUE fname, VALUE file, int start);
895 int ln;
896 rb_ast_t *INITIALIZED ast;
897
898 /* safe results first */
899 make_compile_option(&option, opt);
900 ln = NUM2INT(line);
901 StringValueCStr(file);
902 if (RB_TYPE_P(src, T_FILE)) {
903 parse = rb_parser_compile_file_path;
904 }
905 else {
906 parse = rb_parser_compile_string_path;
907 StringValue(src);
908 }
909 {
910 const VALUE parser = rb_parser_new();
911 rb_parser_set_context(parser, base_block, FALSE);
912 ast = (*parse)(parser, file, src, ln);
913 }
914
915 if (!ast->body.root) {
916 rb_ast_dispose(ast);
917 rb_exc_raise(GET_EC()->errinfo);
918 }
919 else {
920 INITIALIZED VALUE label = parent ?
921 parent->body->location.label :
922 rb_fstring_lit("<compiled>");
923 iseq = rb_iseq_new_with_opt(&ast->body, label, file, realpath, line,
924 parent, type, &option);
925 rb_ast_dispose(ast);
926 }
927
928 return iseq;
929 }
930
931 rb_iseq_t *
rb_iseq_compile(VALUE src,VALUE file,VALUE line)932 rb_iseq_compile(VALUE src, VALUE file, VALUE line)
933 {
934 return rb_iseq_compile_with_option(src, file, Qnil, line, 0, Qnil);
935 }
936
937 rb_iseq_t *
rb_iseq_compile_on_base(VALUE src,VALUE file,VALUE line,const struct rb_block * base_block)938 rb_iseq_compile_on_base(VALUE src, VALUE file, VALUE line, const struct rb_block *base_block)
939 {
940 return rb_iseq_compile_with_option(src, file, Qnil, line, base_block, Qnil);
941 }
942
943 VALUE
rb_iseq_path(const rb_iseq_t * iseq)944 rb_iseq_path(const rb_iseq_t *iseq)
945 {
946 return pathobj_path(iseq->body->location.pathobj);
947 }
948
949 VALUE
rb_iseq_realpath(const rb_iseq_t * iseq)950 rb_iseq_realpath(const rb_iseq_t *iseq)
951 {
952 return pathobj_realpath(iseq->body->location.pathobj);
953 }
954
955 VALUE
rb_iseq_absolute_path(const rb_iseq_t * iseq)956 rb_iseq_absolute_path(const rb_iseq_t *iseq)
957 {
958 return rb_iseq_realpath(iseq);
959 }
960
961 VALUE
rb_iseq_label(const rb_iseq_t * iseq)962 rb_iseq_label(const rb_iseq_t *iseq)
963 {
964 return iseq->body->location.label;
965 }
966
967 VALUE
rb_iseq_base_label(const rb_iseq_t * iseq)968 rb_iseq_base_label(const rb_iseq_t *iseq)
969 {
970 return iseq->body->location.base_label;
971 }
972
973 VALUE
rb_iseq_first_lineno(const rb_iseq_t * iseq)974 rb_iseq_first_lineno(const rb_iseq_t *iseq)
975 {
976 return iseq->body->location.first_lineno;
977 }
978
979 VALUE
rb_iseq_method_name(const rb_iseq_t * iseq)980 rb_iseq_method_name(const rb_iseq_t *iseq)
981 {
982 struct rb_iseq_constant_body *const body = iseq->body->local_iseq->body;
983
984 if (body->type == ISEQ_TYPE_METHOD) {
985 return body->location.base_label;
986 }
987 else {
988 return Qnil;
989 }
990 }
991
992 void
rb_iseq_code_location(const rb_iseq_t * iseq,int * beg_pos_lineno,int * beg_pos_column,int * end_pos_lineno,int * end_pos_column)993 rb_iseq_code_location(const rb_iseq_t *iseq, int *beg_pos_lineno, int *beg_pos_column, int *end_pos_lineno, int *end_pos_column)
994 {
995 const rb_code_location_t *loc = &iseq->body->location.code_location;
996 if (beg_pos_lineno) *beg_pos_lineno = loc->beg_pos.lineno;
997 if (beg_pos_column) *beg_pos_column = loc->beg_pos.column;
998 if (end_pos_lineno) *end_pos_lineno = loc->end_pos.lineno;
999 if (end_pos_column) *end_pos_column = loc->end_pos.column;
1000 }
1001
1002 VALUE
rb_iseq_coverage(const rb_iseq_t * iseq)1003 rb_iseq_coverage(const rb_iseq_t *iseq)
1004 {
1005 return ISEQ_COVERAGE(iseq);
1006 }
1007
1008 static int
remove_coverage_i(void * vstart,void * vend,size_t stride,void * data)1009 remove_coverage_i(void *vstart, void *vend, size_t stride, void *data)
1010 {
1011 VALUE v = (VALUE)vstart;
1012 for (; v != (VALUE)vend; v += stride) {
1013 if (rb_obj_is_iseq(v)) {
1014 rb_iseq_t *iseq = (rb_iseq_t *)v;
1015 ISEQ_COVERAGE_SET(iseq, Qnil);
1016 }
1017 }
1018 return 0;
1019 }
1020
1021 void
rb_iseq_remove_coverage_all(void)1022 rb_iseq_remove_coverage_all(void)
1023 {
1024 rb_objspace_each_objects(remove_coverage_i, NULL);
1025 }
1026
1027 /* define wrapper class methods (RubyVM::InstructionSequence) */
1028
1029 static void
iseqw_mark(void * ptr)1030 iseqw_mark(void *ptr)
1031 {
1032 rb_gc_mark((VALUE)ptr);
1033 }
1034
1035 static size_t
iseqw_memsize(const void * ptr)1036 iseqw_memsize(const void *ptr)
1037 {
1038 return iseq_memsize((const rb_iseq_t *)ptr);
1039 }
1040
1041 static const rb_data_type_t iseqw_data_type = {
1042 "T_IMEMO/iseq",
1043 {iseqw_mark, NULL, iseqw_memsize,},
1044 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED
1045 };
1046
1047 static VALUE
iseqw_new(const rb_iseq_t * iseq)1048 iseqw_new(const rb_iseq_t *iseq)
1049 {
1050 if (iseq->wrapper) {
1051 return iseq->wrapper;
1052 }
1053 else {
1054 union { const rb_iseq_t *in; void *out; } deconst;
1055 VALUE obj;
1056 deconst.in = iseq;
1057 obj = TypedData_Wrap_Struct(rb_cISeq, &iseqw_data_type, deconst.out);
1058 RB_OBJ_WRITTEN(obj, Qundef, iseq);
1059
1060 /* cache a wrapper object */
1061 RB_OBJ_WRITE((VALUE)iseq, &iseq->wrapper, obj);
1062 RB_OBJ_FREEZE((VALUE)iseq);
1063
1064 return obj;
1065 }
1066 }
1067
1068 VALUE
rb_iseqw_new(const rb_iseq_t * iseq)1069 rb_iseqw_new(const rb_iseq_t *iseq)
1070 {
1071 return iseqw_new(iseq);
1072 }
1073
1074 /*
1075 * call-seq:
1076 * InstructionSequence.compile(source[, file[, path[, line[, options]]]]) -> iseq
1077 * InstructionSequence.new(source[, file[, path[, line[, options]]]]) -> iseq
1078 *
1079 * Takes +source+, a String of Ruby code and compiles it to an
1080 * InstructionSequence.
1081 *
1082 * Optionally takes +file+, +path+, and +line+ which describe the filename,
1083 * absolute path and first line number of the ruby code in +source+ which are
1084 * metadata attached to the returned +iseq+.
1085 *
1086 * +options+, which can be +true+, +false+ or a +Hash+, is used to
1087 * modify the default behavior of the Ruby iseq compiler.
1088 *
1089 * For details regarding valid compile options see ::compile_option=.
1090 *
1091 * RubyVM::InstructionSequence.compile("a = 1 + 2")
1092 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1093 *
1094 */
1095 static VALUE
iseqw_s_compile(int argc,VALUE * argv,VALUE self)1096 iseqw_s_compile(int argc, VALUE *argv, VALUE self)
1097 {
1098 VALUE src, file = Qnil, path = Qnil, line = INT2FIX(1), opt = Qnil;
1099 int i;
1100
1101 rb_secure(1);
1102
1103 i = rb_scan_args(argc, argv, "1*:", &src, NULL, &opt);
1104 if (i > 4+NIL_P(opt)) rb_error_arity(argc, 1, 5);
1105 switch (i) {
1106 case 5: opt = argv[--i];
1107 case 4: line = argv[--i];
1108 case 3: path = argv[--i];
1109 case 2: file = argv[--i];
1110 }
1111
1112 if (NIL_P(file)) file = rb_fstring_lit("<compiled>");
1113 if (NIL_P(path)) path = file;
1114 if (NIL_P(line)) line = INT2FIX(1);
1115
1116 Check_Type(path, T_STRING);
1117 Check_Type(file, T_STRING);
1118
1119 return iseqw_new(rb_iseq_compile_with_option(src, file, path, line, 0, opt));
1120 }
1121
1122 /*
1123 * call-seq:
1124 * InstructionSequence.compile_file(file[, options]) -> iseq
1125 *
1126 * Takes +file+, a String with the location of a Ruby source file, reads,
1127 * parses and compiles the file, and returns +iseq+, the compiled
1128 * InstructionSequence with source location metadata set.
1129 *
1130 * Optionally takes +options+, which can be +true+, +false+ or a +Hash+, to
1131 * modify the default behavior of the Ruby iseq compiler.
1132 *
1133 * For details regarding valid compile options see ::compile_option=.
1134 *
1135 * # /tmp/hello.rb
1136 * puts "Hello, world!"
1137 *
1138 * # elsewhere
1139 * RubyVM::InstructionSequence.compile_file("/tmp/hello.rb")
1140 * #=> <RubyVM::InstructionSequence:<main>@/tmp/hello.rb>
1141 */
1142 static VALUE
iseqw_s_compile_file(int argc,VALUE * argv,VALUE self)1143 iseqw_s_compile_file(int argc, VALUE *argv, VALUE self)
1144 {
1145 VALUE file, line = INT2FIX(1), opt = Qnil;
1146 VALUE parser, f, exc = Qnil, ret;
1147 rb_ast_t *ast;
1148 rb_compile_option_t option;
1149 int i;
1150
1151 rb_secure(1);
1152 i = rb_scan_args(argc, argv, "1*:", &file, NULL, &opt);
1153 if (i > 1+NIL_P(opt)) rb_error_arity(argc, 1, 2);
1154 switch (i) {
1155 case 2: opt = argv[--i];
1156 }
1157 FilePathValue(file);
1158 file = rb_fstring(file); /* rb_io_t->pathv gets frozen anyways */
1159
1160 f = rb_file_open_str(file, "r");
1161
1162 parser = rb_parser_new();
1163 rb_parser_set_context(parser, NULL, FALSE);
1164 ast = rb_parser_compile_file_path(parser, file, f, NUM2INT(line));
1165 if (!ast->body.root) exc = GET_EC()->errinfo;
1166
1167 rb_io_close(f);
1168 if (!ast->body.root) {
1169 rb_ast_dispose(ast);
1170 rb_exc_raise(exc);
1171 }
1172
1173 make_compile_option(&option, opt);
1174
1175 ret = iseqw_new(rb_iseq_new_with_opt(&ast->body, rb_fstring_lit("<main>"),
1176 file,
1177 rb_realpath_internal(Qnil, file, 1),
1178 line, NULL, ISEQ_TYPE_TOP, &option));
1179 rb_ast_dispose(ast);
1180 return ret;
1181 }
1182
1183 /*
1184 * call-seq:
1185 * InstructionSequence.compile_option = options
1186 *
1187 * Sets the default values for various optimizations in the Ruby iseq
1188 * compiler.
1189 *
1190 * Possible values for +options+ include +true+, which enables all options,
1191 * +false+ which disables all options, and +nil+ which leaves all options
1192 * unchanged.
1193 *
1194 * You can also pass a +Hash+ of +options+ that you want to change, any
1195 * options not present in the hash will be left unchanged.
1196 *
1197 * Possible option names (which are keys in +options+) which can be set to
1198 * +true+ or +false+ include:
1199 *
1200 * * +:inline_const_cache+
1201 * * +:instructions_unification+
1202 * * +:operands_unification+
1203 * * +:peephole_optimization+
1204 * * +:specialized_instruction+
1205 * * +:stack_caching+
1206 * * +:tailcall_optimization+
1207 *
1208 * Additionally, +:debug_level+ can be set to an integer.
1209 *
1210 * These default options can be overwritten for a single run of the iseq
1211 * compiler by passing any of the above values as the +options+ parameter to
1212 * ::new, ::compile and ::compile_file.
1213 */
1214 static VALUE
iseqw_s_compile_option_set(VALUE self,VALUE opt)1215 iseqw_s_compile_option_set(VALUE self, VALUE opt)
1216 {
1217 rb_compile_option_t option;
1218 rb_secure(1);
1219 make_compile_option(&option, opt);
1220 COMPILE_OPTION_DEFAULT = option;
1221 return opt;
1222 }
1223
1224 /*
1225 * call-seq:
1226 * InstructionSequence.compile_option -> options
1227 *
1228 * Returns a hash of default options used by the Ruby iseq compiler.
1229 *
1230 * For details, see InstructionSequence.compile_option=.
1231 */
1232 static VALUE
iseqw_s_compile_option_get(VALUE self)1233 iseqw_s_compile_option_get(VALUE self)
1234 {
1235 return make_compile_option_value(&COMPILE_OPTION_DEFAULT);
1236 }
1237
1238 static const rb_iseq_t *
iseqw_check(VALUE iseqw)1239 iseqw_check(VALUE iseqw)
1240 {
1241 rb_iseq_t *iseq = DATA_PTR(iseqw);
1242
1243 if (!iseq->body) {
1244 rb_ibf_load_iseq_complete(iseq);
1245 }
1246
1247 if (!iseq->body->location.label) {
1248 rb_raise(rb_eTypeError, "uninitialized InstructionSequence");
1249 }
1250 return iseq;
1251 }
1252
1253 const rb_iseq_t *
rb_iseqw_to_iseq(VALUE iseqw)1254 rb_iseqw_to_iseq(VALUE iseqw)
1255 {
1256 return iseqw_check(iseqw);
1257 }
1258
1259 /*
1260 * call-seq:
1261 * iseq.eval -> obj
1262 *
1263 * Evaluates the instruction sequence and returns the result.
1264 *
1265 * RubyVM::InstructionSequence.compile("1 + 2").eval #=> 3
1266 */
1267 static VALUE
iseqw_eval(VALUE self)1268 iseqw_eval(VALUE self)
1269 {
1270 rb_secure(1);
1271 return rb_iseq_eval(iseqw_check(self));
1272 }
1273
1274 /*
1275 * Returns a human-readable string representation of this instruction
1276 * sequence, including the #label and #path.
1277 */
1278 static VALUE
iseqw_inspect(VALUE self)1279 iseqw_inspect(VALUE self)
1280 {
1281 const rb_iseq_t *iseq = iseqw_check(self);
1282 const struct rb_iseq_constant_body *const body = iseq->body;
1283 VALUE klass = rb_class_name(rb_obj_class(self));
1284
1285 if (!body->location.label) {
1286 return rb_sprintf("#<%"PRIsVALUE": uninitialized>", klass);
1287 }
1288 else {
1289 return rb_sprintf("<%"PRIsVALUE":%"PRIsVALUE"@%"PRIsVALUE":%d>",
1290 klass,
1291 body->location.label, rb_iseq_path(iseq),
1292 FIX2INT(rb_iseq_first_lineno(iseq)));
1293 }
1294 }
1295
1296 /*
1297 * Returns the path of this instruction sequence.
1298 *
1299 * <code><compiled></code> if the iseq was evaluated from a string.
1300 *
1301 * For example, using irb:
1302 *
1303 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
1304 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1305 * iseq.path
1306 * #=> "<compiled>"
1307 *
1308 * Using ::compile_file:
1309 *
1310 * # /tmp/method.rb
1311 * def hello
1312 * puts "hello, world"
1313 * end
1314 *
1315 * # in irb
1316 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
1317 * > iseq.path #=> /tmp/method.rb
1318 */
1319 static VALUE
iseqw_path(VALUE self)1320 iseqw_path(VALUE self)
1321 {
1322 return rb_iseq_path(iseqw_check(self));
1323 }
1324
1325 /*
1326 * Returns the absolute path of this instruction sequence.
1327 *
1328 * +nil+ if the iseq was evaluated from a string.
1329 *
1330 * For example, using ::compile_file:
1331 *
1332 * # /tmp/method.rb
1333 * def hello
1334 * puts "hello, world"
1335 * end
1336 *
1337 * # in irb
1338 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
1339 * > iseq.absolute_path #=> /tmp/method.rb
1340 */
1341 static VALUE
iseqw_absolute_path(VALUE self)1342 iseqw_absolute_path(VALUE self)
1343 {
1344 return rb_iseq_realpath(iseqw_check(self));
1345 }
1346
1347 /* Returns the label of this instruction sequence.
1348 *
1349 * <code><main></code> if it's at the top level, <code><compiled></code> if it
1350 * was evaluated from a string.
1351 *
1352 * For example, using irb:
1353 *
1354 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
1355 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1356 * iseq.label
1357 * #=> "<compiled>"
1358 *
1359 * Using ::compile_file:
1360 *
1361 * # /tmp/method.rb
1362 * def hello
1363 * puts "hello, world"
1364 * end
1365 *
1366 * # in irb
1367 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
1368 * > iseq.label #=> <main>
1369 */
1370 static VALUE
iseqw_label(VALUE self)1371 iseqw_label(VALUE self)
1372 {
1373 return rb_iseq_label(iseqw_check(self));
1374 }
1375
1376 /* Returns the base label of this instruction sequence.
1377 *
1378 * For example, using irb:
1379 *
1380 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
1381 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1382 * iseq.base_label
1383 * #=> "<compiled>"
1384 *
1385 * Using ::compile_file:
1386 *
1387 * # /tmp/method.rb
1388 * def hello
1389 * puts "hello, world"
1390 * end
1391 *
1392 * # in irb
1393 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
1394 * > iseq.base_label #=> <main>
1395 */
1396 static VALUE
iseqw_base_label(VALUE self)1397 iseqw_base_label(VALUE self)
1398 {
1399 return rb_iseq_base_label(iseqw_check(self));
1400 }
1401
1402 /* Returns the number of the first source line where the instruction sequence
1403 * was loaded from.
1404 *
1405 * For example, using irb:
1406 *
1407 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
1408 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1409 * iseq.first_lineno
1410 * #=> 1
1411 */
1412 static VALUE
iseqw_first_lineno(VALUE self)1413 iseqw_first_lineno(VALUE self)
1414 {
1415 return rb_iseq_first_lineno(iseqw_check(self));
1416 }
1417
1418 static VALUE iseq_data_to_ary(const rb_iseq_t *iseq);
1419
1420 /*
1421 * call-seq:
1422 * iseq.to_a -> ary
1423 *
1424 * Returns an Array with 14 elements representing the instruction sequence
1425 * with the following data:
1426 *
1427 * [magic]
1428 * A string identifying the data format. <b>Always
1429 * +YARVInstructionSequence/SimpleDataFormat+.</b>
1430 *
1431 * [major_version]
1432 * The major version of the instruction sequence.
1433 *
1434 * [minor_version]
1435 * The minor version of the instruction sequence.
1436 *
1437 * [format_type]
1438 * A number identifying the data format. <b>Always 1</b>.
1439 *
1440 * [misc]
1441 * A hash containing:
1442 *
1443 * [+:arg_size+]
1444 * the total number of arguments taken by the method or the block (0 if
1445 * _iseq_ doesn't represent a method or block)
1446 * [+:local_size+]
1447 * the number of local variables + 1
1448 * [+:stack_max+]
1449 * used in calculating the stack depth at which a SystemStackError is
1450 * thrown.
1451 *
1452 * [#label]
1453 * The name of the context (block, method, class, module, etc.) that this
1454 * instruction sequence belongs to.
1455 *
1456 * <code><main></code> if it's at the top level, <code><compiled></code> if
1457 * it was evaluated from a string.
1458 *
1459 * [#path]
1460 * The relative path to the Ruby file where the instruction sequence was
1461 * loaded from.
1462 *
1463 * <code><compiled></code> if the iseq was evaluated from a string.
1464 *
1465 * [#absolute_path]
1466 * The absolute path to the Ruby file where the instruction sequence was
1467 * loaded from.
1468 *
1469 * +nil+ if the iseq was evaluated from a string.
1470 *
1471 * [#first_lineno]
1472 * The number of the first source line where the instruction sequence was
1473 * loaded from.
1474 *
1475 * [type]
1476 * The type of the instruction sequence.
1477 *
1478 * Valid values are +:top+, +:method+, +:block+, +:class+, +:rescue+,
1479 * +:ensure+, +:eval+, +:main+, and +plain+.
1480 *
1481 * [locals]
1482 * An array containing the names of all arguments and local variables as
1483 * symbols.
1484 *
1485 * [params]
1486 * An Hash object containing parameter information.
1487 *
1488 * More info about these values can be found in +vm_core.h+.
1489 *
1490 * [catch_table]
1491 * A list of exceptions and control flow operators (rescue, next, redo,
1492 * break, etc.).
1493 *
1494 * [bytecode]
1495 * An array of arrays containing the instruction names and operands that
1496 * make up the body of the instruction sequence.
1497 *
1498 * Note that this format is MRI specific and version dependent.
1499 *
1500 */
1501 static VALUE
iseqw_to_a(VALUE self)1502 iseqw_to_a(VALUE self)
1503 {
1504 const rb_iseq_t *iseq = iseqw_check(self);
1505 rb_secure(1);
1506 return iseq_data_to_ary(iseq);
1507 }
1508
1509 #if VM_INSN_INFO_TABLE_IMPL == 1 /* binary search */
1510 static const struct iseq_insn_info_entry *
get_insn_info_binary_search(const rb_iseq_t * iseq,size_t pos)1511 get_insn_info_binary_search(const rb_iseq_t *iseq, size_t pos)
1512 {
1513 const struct rb_iseq_constant_body *const body = iseq->body;
1514 size_t size = body->insns_info.size;
1515 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
1516 const unsigned int *positions = body->insns_info.positions;
1517 const int debug = 0;
1518
1519 if (debug) {
1520 printf("size: %"PRIuSIZE"\n", size);
1521 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
1522 (size_t)0, positions[0], insns_info[0].line_no, pos);
1523 }
1524
1525 if (size == 0) {
1526 return NULL;
1527 }
1528 else if (size == 1) {
1529 return &insns_info[0];
1530 }
1531 else {
1532 size_t l = 1, r = size - 1;
1533 while (l <= r) {
1534 size_t m = l + (r - l) / 2;
1535 if (positions[m] == pos) {
1536 return &insns_info[m];
1537 }
1538 if (positions[m] < pos) {
1539 l = m + 1;
1540 }
1541 else {
1542 r = m - 1;
1543 }
1544 }
1545 if (l >= size) {
1546 return &insns_info[size-1];
1547 }
1548 if (positions[l] > pos) {
1549 return &insns_info[l-1];
1550 }
1551 return &insns_info[l];
1552 }
1553 }
1554
1555 static const struct iseq_insn_info_entry *
get_insn_info(const rb_iseq_t * iseq,size_t pos)1556 get_insn_info(const rb_iseq_t *iseq, size_t pos)
1557 {
1558 return get_insn_info_binary_search(iseq, pos);
1559 }
1560 #endif
1561
1562 #if VM_INSN_INFO_TABLE_IMPL == 2 /* succinct bitvector */
1563 static const struct iseq_insn_info_entry *
get_insn_info_succinct_bitvector(const rb_iseq_t * iseq,size_t pos)1564 get_insn_info_succinct_bitvector(const rb_iseq_t *iseq, size_t pos)
1565 {
1566 const struct rb_iseq_constant_body *const body = iseq->body;
1567 size_t size = body->insns_info.size;
1568 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
1569 const int debug = 0;
1570
1571 if (debug) {
1572 #if VM_CHECK_MODE > 0
1573 const unsigned int *positions = body->insns_info.positions;
1574 printf("size: %"PRIuSIZE"\n", size);
1575 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
1576 (size_t)0, positions[0], insns_info[0].line_no, pos);
1577 #else
1578 printf("size: %"PRIuSIZE"\n", size);
1579 printf("insns_info[%"PRIuSIZE"]: line: %d, pos: %"PRIuSIZE"\n",
1580 (size_t)0, insns_info[0].line_no, pos);
1581 #endif
1582 }
1583
1584 if (size == 0) {
1585 return NULL;
1586 }
1587 else if (size == 1) {
1588 return &insns_info[0];
1589 }
1590 else {
1591 int index;
1592 VM_ASSERT(body->insns_info.succ_index_table != NULL);
1593 index = succ_index_lookup(body->insns_info.succ_index_table, (int)pos);
1594 return &insns_info[index-1];
1595 }
1596 }
1597
1598 static const struct iseq_insn_info_entry *
get_insn_info(const rb_iseq_t * iseq,size_t pos)1599 get_insn_info(const rb_iseq_t *iseq, size_t pos)
1600 {
1601 return get_insn_info_succinct_bitvector(iseq, pos);
1602 }
1603 #endif
1604
1605 #if VM_CHECK_MODE > 0 || VM_INSN_INFO_TABLE_IMPL == 0
1606 static const struct iseq_insn_info_entry *
get_insn_info_linear_search(const rb_iseq_t * iseq,size_t pos)1607 get_insn_info_linear_search(const rb_iseq_t *iseq, size_t pos)
1608 {
1609 const struct rb_iseq_constant_body *const body = iseq->body;
1610 size_t i = 0, size = body->insns_info.size;
1611 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
1612 const unsigned int *positions = body->insns_info.positions;
1613 const int debug = 0;
1614
1615 if (debug) {
1616 printf("size: %"PRIuSIZE"\n", size);
1617 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
1618 i, positions[i], insns_info[i].line_no, pos);
1619 }
1620
1621 if (size == 0) {
1622 return NULL;
1623 }
1624 else if (size == 1) {
1625 return &insns_info[0];
1626 }
1627 else {
1628 for (i=1; i<size; i++) {
1629 if (debug) printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
1630 i, positions[i], insns_info[i].line_no, pos);
1631
1632 if (positions[i] == pos) {
1633 return &insns_info[i];
1634 }
1635 if (positions[i] > pos) {
1636 return &insns_info[i-1];
1637 }
1638 }
1639 }
1640 return &insns_info[i-1];
1641 }
1642 #endif
1643
1644 #if VM_INSN_INFO_TABLE_IMPL == 0 /* linear search */
1645 static const struct iseq_insn_info_entry *
get_insn_info(const rb_iseq_t * iseq,size_t pos)1646 get_insn_info(const rb_iseq_t *iseq, size_t pos)
1647 {
1648 return get_insn_info_linear_search(iseq, pos);
1649 }
1650 #endif
1651
1652 #if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
1653 static void
validate_get_insn_info(const rb_iseq_t * iseq)1654 validate_get_insn_info(const rb_iseq_t *iseq)
1655 {
1656 const struct rb_iseq_constant_body *const body = iseq->body;
1657 size_t i;
1658 for (i = 0; i < body->iseq_size; i++) {
1659 if (get_insn_info_linear_search(iseq, i) != get_insn_info(iseq, i)) {
1660 rb_bug("validate_get_insn_info: get_insn_info_linear_search(iseq, %"PRIuSIZE") != get_insn_info(iseq, %"PRIuSIZE")", i, i);
1661 }
1662 }
1663 }
1664 #endif
1665
1666 unsigned int
rb_iseq_line_no(const rb_iseq_t * iseq,size_t pos)1667 rb_iseq_line_no(const rb_iseq_t *iseq, size_t pos)
1668 {
1669 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
1670
1671 if (entry) {
1672 return entry->line_no;
1673 }
1674 else {
1675 return 0;
1676 }
1677 }
1678
1679 MJIT_FUNC_EXPORTED rb_event_flag_t
rb_iseq_event_flags(const rb_iseq_t * iseq,size_t pos)1680 rb_iseq_event_flags(const rb_iseq_t *iseq, size_t pos)
1681 {
1682 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
1683 if (entry) {
1684 return entry->events;
1685 }
1686 else {
1687 return 0;
1688 }
1689 }
1690
1691 void
rb_iseq_clear_event_flags(const rb_iseq_t * iseq,size_t pos,rb_event_flag_t reset)1692 rb_iseq_clear_event_flags(const rb_iseq_t *iseq, size_t pos, rb_event_flag_t reset)
1693 {
1694 struct iseq_insn_info_entry *entry = (struct iseq_insn_info_entry *)get_insn_info(iseq, pos);
1695 if (entry) {
1696 entry->events &= ~reset;
1697 if (!(entry->events & iseq->aux.exec.global_trace_events)) {
1698 void rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos);
1699 rb_iseq_trace_flag_cleared(iseq, pos);
1700 }
1701 }
1702 }
1703
1704 static VALUE
local_var_name(const rb_iseq_t * diseq,VALUE level,VALUE op)1705 local_var_name(const rb_iseq_t *diseq, VALUE level, VALUE op)
1706 {
1707 VALUE i;
1708 VALUE name;
1709 ID lid;
1710 int idx;
1711
1712 for (i = 0; i < level; i++) {
1713 diseq = diseq->body->parent_iseq;
1714 }
1715 idx = diseq->body->local_table_size - (int)op - 1;
1716 lid = diseq->body->local_table[idx];
1717 name = rb_id2str(lid);
1718 if (!name) {
1719 name = rb_str_new_cstr("?");
1720 }
1721 else if (!rb_str_symname_p(name)) {
1722 name = rb_str_inspect(name);
1723 }
1724 else {
1725 name = rb_str_dup(name);
1726 }
1727 rb_str_catf(name, "@%d", idx);
1728 return name;
1729 }
1730
1731 int rb_insn_unified_local_var_level(VALUE);
1732
1733 VALUE
rb_insn_operand_intern(const rb_iseq_t * iseq,VALUE insn,int op_no,VALUE op,int len,size_t pos,const VALUE * pnop,VALUE child)1734 rb_insn_operand_intern(const rb_iseq_t *iseq,
1735 VALUE insn, int op_no, VALUE op,
1736 int len, size_t pos, const VALUE *pnop, VALUE child)
1737 {
1738 const char *types = insn_op_types(insn);
1739 char type = types[op_no];
1740 VALUE ret = Qundef;
1741
1742 switch (type) {
1743 case TS_OFFSET: /* LONG */
1744 ret = rb_sprintf("%"PRIdVALUE, (VALUE)(pos + len + op));
1745 break;
1746
1747 case TS_NUM: /* ULONG */
1748 if (insn == BIN(defined) && op_no == 0) {
1749 enum defined_type deftype = (enum defined_type)op;
1750 if (deftype == DEFINED_FUNC) {
1751 ret = rb_fstring_lit("func"); break;
1752 }
1753 if (deftype == DEFINED_REF) {
1754 ret = rb_fstring_lit("ref"); break;
1755 }
1756 ret = rb_iseq_defined_string(deftype);
1757 if (ret) break;
1758 }
1759 else if (insn == BIN(checktype) && op_no == 0) {
1760 const char *type_str = rb_type_str((enum ruby_value_type)op);
1761 if (type_str) {
1762 ret = rb_str_new_cstr(type_str); break;
1763 }
1764 }
1765 ret = rb_sprintf("%"PRIuVALUE, op);
1766 break;
1767
1768 case TS_LINDEX:{
1769 int level;
1770 if (types[op_no+1] == TS_NUM && pnop) {
1771 ret = local_var_name(iseq, *pnop, op - VM_ENV_DATA_SIZE);
1772 }
1773 else if ((level = rb_insn_unified_local_var_level(insn)) >= 0) {
1774 ret = local_var_name(iseq, (VALUE)level, op - VM_ENV_DATA_SIZE);
1775 }
1776 else {
1777 ret = rb_inspect(INT2FIX(op));
1778 }
1779 break;
1780 }
1781 case TS_ID: /* ID (symbol) */
1782 ret = rb_inspect(ID2SYM(op));
1783 break;
1784
1785 case TS_VALUE: /* VALUE */
1786 op = obj_resurrect(op);
1787 if (insn == BIN(defined) && op_no == 1 && FIXNUM_P(op)) {
1788 /* should be DEFINED_REF */
1789 int type = NUM2INT(op);
1790 if (type) {
1791 if (type & 1) {
1792 ret = rb_sprintf(":$%c", (type >> 1));
1793 }
1794 else {
1795 ret = rb_sprintf(":$%d", (type >> 1));
1796 }
1797 break;
1798 }
1799 }
1800 ret = rb_inspect(op);
1801 if (CLASS_OF(op) == rb_cISeq) {
1802 if (child) {
1803 rb_ary_push(child, op);
1804 }
1805 }
1806 break;
1807
1808 case TS_ISEQ: /* iseq */
1809 {
1810 if (op) {
1811 const rb_iseq_t *iseq = rb_iseq_check((rb_iseq_t *)op);
1812 ret = iseq->body->location.label;
1813 if (child) {
1814 rb_ary_push(child, (VALUE)iseq);
1815 }
1816 }
1817 else {
1818 ret = rb_str_new2("nil");
1819 }
1820 break;
1821 }
1822 case TS_GENTRY:
1823 {
1824 struct rb_global_entry *entry = (struct rb_global_entry *)op;
1825 ret = rb_str_dup(rb_id2str(entry->id));
1826 }
1827 break;
1828
1829 case TS_IC:
1830 case TS_ISE:
1831 ret = rb_sprintf("<is:%"PRIdPTRDIFF">", (union iseq_inline_storage_entry *)op - iseq->body->is_entries);
1832 break;
1833
1834 case TS_CALLINFO:
1835 {
1836 struct rb_call_info *ci = (struct rb_call_info *)op;
1837 VALUE ary = rb_ary_new();
1838
1839 if (ci->mid) {
1840 rb_ary_push(ary, rb_sprintf("mid:%"PRIsVALUE, rb_id2str(ci->mid)));
1841 }
1842
1843 rb_ary_push(ary, rb_sprintf("argc:%d", ci->orig_argc));
1844
1845 if (ci->flag & VM_CALL_KWARG) {
1846 struct rb_call_info_kw_arg *kw_args = ((struct rb_call_info_with_kwarg *)ci)->kw_arg;
1847 VALUE kw_ary = rb_ary_new_from_values(kw_args->keyword_len, kw_args->keywords);
1848 rb_ary_push(ary, rb_sprintf("kw:[%"PRIsVALUE"]", rb_ary_join(kw_ary, rb_str_new2(","))));
1849 }
1850
1851 if (ci->flag) {
1852 VALUE flags = rb_ary_new();
1853 # define CALL_FLAG(n) if (ci->flag & VM_CALL_##n) rb_ary_push(flags, rb_str_new2(#n))
1854 CALL_FLAG(ARGS_SPLAT);
1855 CALL_FLAG(ARGS_BLOCKARG);
1856 CALL_FLAG(FCALL);
1857 CALL_FLAG(VCALL);
1858 CALL_FLAG(ARGS_SIMPLE);
1859 CALL_FLAG(BLOCKISEQ);
1860 CALL_FLAG(TAILCALL);
1861 CALL_FLAG(SUPER);
1862 CALL_FLAG(ZSUPER);
1863 CALL_FLAG(KWARG);
1864 CALL_FLAG(KW_SPLAT);
1865 CALL_FLAG(OPT_SEND); /* maybe not reachable */
1866 rb_ary_push(ary, rb_ary_join(flags, rb_str_new2("|")));
1867 }
1868 ret = rb_sprintf("<callinfo!%"PRIsVALUE">", rb_ary_join(ary, rb_str_new2(", ")));
1869 }
1870 break;
1871
1872 case TS_CALLCACHE:
1873 ret = rb_str_new2("<callcache>");
1874 break;
1875
1876 case TS_CDHASH:
1877 ret = rb_str_new2("<cdhash>");
1878 break;
1879
1880 case TS_FUNCPTR:
1881 {
1882 #ifdef HAVE_DLADDR
1883 Dl_info info;
1884 if (dladdr((void *)op, &info) && info.dli_sname) {
1885 ret = rb_str_new_cstr(info.dli_sname);
1886 break;
1887 }
1888 #endif
1889 ret = rb_str_new2("<funcptr>");
1890 }
1891 break;
1892
1893 default:
1894 rb_bug("unknown operand type: %c", type);
1895 }
1896 return ret;
1897 }
1898
1899 static VALUE
right_strip(VALUE str)1900 right_strip(VALUE str)
1901 {
1902 const char *beg = RSTRING_PTR(str), *end = RSTRING_END(str);
1903 while (end-- > beg && *end == ' ');
1904 rb_str_set_len(str, end - beg + 1);
1905 return str;
1906 }
1907
1908 /**
1909 * Disassemble a instruction
1910 * Iseq -> Iseq inspect object
1911 */
1912 int
rb_iseq_disasm_insn(VALUE ret,const VALUE * code,size_t pos,const rb_iseq_t * iseq,VALUE child)1913 rb_iseq_disasm_insn(VALUE ret, const VALUE *code, size_t pos,
1914 const rb_iseq_t *iseq, VALUE child)
1915 {
1916 VALUE insn = code[pos];
1917 int len = insn_len(insn);
1918 int j;
1919 const char *types = insn_op_types(insn);
1920 VALUE str = rb_str_new(0, 0);
1921 const char *insn_name_buff;
1922
1923 insn_name_buff = insn_name(insn);
1924 if (1) {
1925 extern const int rb_vm_max_insn_name_size;
1926 rb_str_catf(str, "%04"PRIuSIZE" %-*s ", pos, rb_vm_max_insn_name_size, insn_name_buff);
1927 }
1928 else {
1929 rb_str_catf(str, "%04"PRIuSIZE" %-28.*s ", pos,
1930 (int)strcspn(insn_name_buff, "_"), insn_name_buff);
1931 }
1932
1933 for (j = 0; types[j]; j++) {
1934 VALUE opstr = rb_insn_operand_intern(iseq, insn, j, code[pos + j + 1],
1935 len, pos, &code[pos + j + 2],
1936 child);
1937 rb_str_concat(str, opstr);
1938
1939 if (types[j + 1]) {
1940 rb_str_cat2(str, ", ");
1941 }
1942 }
1943
1944 {
1945 unsigned int line_no = rb_iseq_line_no(iseq, pos);
1946 unsigned int prev = pos == 0 ? 0 : rb_iseq_line_no(iseq, pos - 1);
1947 if (line_no && line_no != prev) {
1948 long slen = RSTRING_LEN(str);
1949 slen = (slen > 70) ? 0 : (70 - slen);
1950 str = rb_str_catf(str, "%*s(%4d)", (int)slen, "", line_no);
1951 }
1952 }
1953
1954 {
1955 rb_event_flag_t events = rb_iseq_event_flags(iseq, pos);
1956 if (events) {
1957 str = rb_str_catf(str, "[%s%s%s%s%s%s%s%s%s%s%s]",
1958 events & RUBY_EVENT_LINE ? "Li" : "",
1959 events & RUBY_EVENT_CLASS ? "Cl" : "",
1960 events & RUBY_EVENT_END ? "En" : "",
1961 events & RUBY_EVENT_CALL ? "Ca" : "",
1962 events & RUBY_EVENT_RETURN ? "Re" : "",
1963 events & RUBY_EVENT_C_CALL ? "Cc" : "",
1964 events & RUBY_EVENT_C_RETURN ? "Cr" : "",
1965 events & RUBY_EVENT_B_CALL ? "Bc" : "",
1966 events & RUBY_EVENT_B_RETURN ? "Br" : "",
1967 events & RUBY_EVENT_COVERAGE_LINE ? "Cli" : "",
1968 events & RUBY_EVENT_COVERAGE_BRANCH ? "Cbr" : "");
1969 }
1970 }
1971
1972 right_strip(str);
1973 if (ret) {
1974 rb_str_cat2(str, "\n");
1975 rb_str_concat(ret, str);
1976 }
1977 else {
1978 printf("%.*s\n", (int)RSTRING_LEN(str), RSTRING_PTR(str));
1979 }
1980 return len;
1981 }
1982
1983 static const char *
catch_type(int type)1984 catch_type(int type)
1985 {
1986 switch (type) {
1987 case CATCH_TYPE_RESCUE:
1988 return "rescue";
1989 case CATCH_TYPE_ENSURE:
1990 return "ensure";
1991 case CATCH_TYPE_RETRY:
1992 return "retry";
1993 case CATCH_TYPE_BREAK:
1994 return "break";
1995 case CATCH_TYPE_REDO:
1996 return "redo";
1997 case CATCH_TYPE_NEXT:
1998 return "next";
1999 default:
2000 rb_bug("unknown catch type: %d", type);
2001 return 0;
2002 }
2003 }
2004
2005 static VALUE
iseq_inspect(const rb_iseq_t * iseq)2006 iseq_inspect(const rb_iseq_t *iseq)
2007 {
2008 const struct rb_iseq_constant_body *const body = iseq->body;
2009 if (!body->location.label) {
2010 return rb_sprintf("#<ISeq: uninitialized>");
2011 }
2012 else {
2013 const rb_code_location_t *loc = &body->location.code_location;
2014 return rb_sprintf("#<ISeq:%"PRIsVALUE"@%"PRIsVALUE":%d (%d,%d)-(%d,%d)>",
2015 body->location.label, rb_iseq_path(iseq),
2016 loc->beg_pos.lineno,
2017 loc->beg_pos.lineno,
2018 loc->beg_pos.column,
2019 loc->end_pos.lineno,
2020 loc->end_pos.column);
2021 }
2022 }
2023
2024 static VALUE
rb_iseq_disasm_recursive(const rb_iseq_t * iseq,VALUE indent)2025 rb_iseq_disasm_recursive(const rb_iseq_t *iseq, VALUE indent)
2026 {
2027 const struct rb_iseq_constant_body *const body = iseq->body;
2028 VALUE *code;
2029 VALUE str = rb_str_new(0, 0);
2030 VALUE child = rb_ary_tmp_new(3);
2031 unsigned int size;
2032 unsigned int i;
2033 long l;
2034 size_t n;
2035 enum {header_minlen = 72};
2036 st_table *done_iseq = 0;
2037 const char *indent_str;
2038 long indent_len;
2039
2040 rb_secure(1);
2041
2042 size = body->iseq_size;
2043
2044 indent_len = RSTRING_LEN(indent);
2045 indent_str = RSTRING_PTR(indent);
2046
2047 rb_str_cat(str, indent_str, indent_len);
2048 rb_str_cat2(str, "== disasm: ");
2049
2050 rb_str_append(str, iseq_inspect(iseq));
2051 rb_str_catf(str, " (catch: %s)", body->catch_except_p ? "TRUE" : "FALSE");
2052 if ((l = RSTRING_LEN(str) - indent_len) < header_minlen) {
2053 rb_str_modify_expand(str, header_minlen - l);
2054 memset(RSTRING_END(str), '=', header_minlen - l);
2055 }
2056 rb_str_cat2(str, "\n");
2057
2058 /* show catch table information */
2059 if (body->catch_table) {
2060 rb_str_cat(str, indent_str, indent_len);
2061 rb_str_cat2(str, "== catch table\n");
2062 }
2063 if (body->catch_table) {
2064 rb_str_cat_cstr(indent, "| ");
2065 indent_str = RSTRING_PTR(indent);
2066 for (i = 0; i < body->catch_table->size; i++) {
2067 const struct iseq_catch_table_entry *entry = &body->catch_table->entries[i];
2068 rb_str_cat(str, indent_str, indent_len);
2069 rb_str_catf(str,
2070 "| catch type: %-6s st: %04d ed: %04d sp: %04d cont: %04d\n",
2071 catch_type((int)entry->type), (int)entry->start,
2072 (int)entry->end, (int)entry->sp, (int)entry->cont);
2073 if (entry->iseq && !(done_iseq && st_is_member(done_iseq, (st_data_t)entry->iseq))) {
2074 rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check(entry->iseq), indent));
2075 if (!done_iseq) done_iseq = st_init_numtable();
2076 st_insert(done_iseq, (st_data_t)entry->iseq, (st_data_t)0);
2077 indent_str = RSTRING_PTR(indent);
2078 }
2079 }
2080 rb_str_resize(indent, indent_len);
2081 indent_str = RSTRING_PTR(indent);
2082 }
2083 if (body->catch_table) {
2084 rb_str_cat(str, indent_str, indent_len);
2085 rb_str_cat2(str, "|-------------------------------------"
2086 "-----------------------------------\n");
2087 }
2088
2089 /* show local table information */
2090 if (body->local_table) {
2091 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
2092 rb_str_cat(str, indent_str, indent_len);
2093 rb_str_catf(str,
2094 "local table (size: %d, argc: %d "
2095 "[opts: %d, rest: %d, post: %d, block: %d, kw: %d@%d, kwrest: %d])\n",
2096 body->local_table_size,
2097 body->param.lead_num,
2098 body->param.opt_num,
2099 body->param.flags.has_rest ? body->param.rest_start : -1,
2100 body->param.post_num,
2101 body->param.flags.has_block ? body->param.block_start : -1,
2102 body->param.flags.has_kw ? keyword->num : -1,
2103 body->param.flags.has_kw ? keyword->required_num : -1,
2104 body->param.flags.has_kwrest ? keyword->rest_start : -1);
2105
2106 for (i = body->local_table_size; i > 0;) {
2107 int li = body->local_table_size - --i - 1;
2108 long width;
2109 VALUE name = local_var_name(iseq, 0, i);
2110 char argi[0x100] = "";
2111 char opti[0x100] = "";
2112
2113 if (body->param.flags.has_opt) {
2114 int argc = body->param.lead_num;
2115 int opts = body->param.opt_num;
2116 if (li >= argc && li < argc + opts) {
2117 snprintf(opti, sizeof(opti), "Opt=%"PRIdVALUE,
2118 body->param.opt_table[li - argc]);
2119 }
2120 }
2121
2122 snprintf(argi, sizeof(argi), "%s%s%s%s%s%s", /* arg, opts, rest, post, kwrest, block */
2123 body->param.lead_num > li ? "Arg" : "",
2124 opti,
2125 (body->param.flags.has_rest && body->param.rest_start == li) ? "Rest" : "",
2126 (body->param.flags.has_post && body->param.post_start <= li && li < body->param.post_start + body->param.post_num) ? "Post" : "",
2127 (body->param.flags.has_kwrest && keyword->rest_start == li) ? "Kwrest" : "",
2128 (body->param.flags.has_block && body->param.block_start == li) ? "Block" : "");
2129
2130 rb_str_cat(str, indent_str, indent_len);
2131 rb_str_catf(str, "[%2d] ", i + 1);
2132 width = RSTRING_LEN(str) + 11;
2133 rb_str_append(str, name);
2134 if (*argi) rb_str_catf(str, "<%s>", argi);
2135 if ((width -= RSTRING_LEN(str)) > 0) rb_str_catf(str, "%*s", (int)width, "");
2136 }
2137 rb_str_cat_cstr(right_strip(str), "\n");
2138 }
2139
2140 /* show each line */
2141 code = rb_iseq_original_iseq(iseq);
2142 for (n = 0; n < size;) {
2143 rb_str_cat(str, indent_str, indent_len);
2144 n += rb_iseq_disasm_insn(str, code, n, iseq, child);
2145 }
2146
2147 for (l = 0; l < RARRAY_LEN(child); l++) {
2148 VALUE isv = rb_ary_entry(child, l);
2149 if (done_iseq && st_is_member(done_iseq, (st_data_t)isv)) continue;
2150 rb_str_cat_cstr(str, "\n");
2151 rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check((rb_iseq_t *)isv), indent));
2152 indent_str = RSTRING_PTR(indent);
2153 }
2154 if (done_iseq) st_free_table(done_iseq);
2155
2156 return str;
2157 }
2158
2159 VALUE
rb_iseq_disasm(const rb_iseq_t * iseq)2160 rb_iseq_disasm(const rb_iseq_t *iseq)
2161 {
2162 return rb_iseq_disasm_recursive(iseq, rb_str_new(0, 0));
2163 }
2164
2165 /*
2166 * call-seq:
2167 * iseq.disasm -> str
2168 * iseq.disassemble -> str
2169 *
2170 * Returns the instruction sequence as a +String+ in human readable form.
2171 *
2172 * puts RubyVM::InstructionSequence.compile('1 + 2').disasm
2173 *
2174 * Produces:
2175 *
2176 * == disasm: <RubyVM::InstructionSequence:<compiled>@<compiled>>==========
2177 * 0000 trace 1 ( 1)
2178 * 0002 putobject 1
2179 * 0004 putobject 2
2180 * 0006 opt_plus <ic:1>
2181 * 0008 leave
2182 */
2183 static VALUE
iseqw_disasm(VALUE self)2184 iseqw_disasm(VALUE self)
2185 {
2186 return rb_iseq_disasm(iseqw_check(self));
2187 }
2188
2189 static int
iseq_iterate_children(const rb_iseq_t * iseq,void (* iter_func)(const rb_iseq_t * child_iseq,void * data),void * data)2190 iseq_iterate_children(const rb_iseq_t *iseq, void (*iter_func)(const rb_iseq_t *child_iseq, void *data), void *data)
2191 {
2192 unsigned int i;
2193 VALUE *code = rb_iseq_original_iseq(iseq);
2194 const struct rb_iseq_constant_body *const body = iseq->body;
2195 const rb_iseq_t *child;
2196 VALUE all_children = rb_obj_hide(rb_ident_hash_new());
2197
2198 if (body->catch_table) {
2199 for (i = 0; i < body->catch_table->size; i++) {
2200 const struct iseq_catch_table_entry *entry = &body->catch_table->entries[i];
2201 child = entry->iseq;
2202 if (child) {
2203 if (rb_hash_aref(all_children, (VALUE)child) == Qnil) {
2204 rb_hash_aset(all_children, (VALUE)child, Qtrue);
2205 (*iter_func)(child, data);
2206 }
2207 }
2208 }
2209 }
2210
2211 for (i=0; i<body->iseq_size;) {
2212 VALUE insn = code[i];
2213 int len = insn_len(insn);
2214 const char *types = insn_op_types(insn);
2215 int j;
2216
2217 for (j=0; types[j]; j++) {
2218 switch (types[j]) {
2219 case TS_ISEQ:
2220 child = (const rb_iseq_t *)code[i+j+1];
2221 if (child) {
2222 if (rb_hash_aref(all_children, (VALUE)child) == Qnil) {
2223 rb_hash_aset(all_children, (VALUE)child, Qtrue);
2224 (*iter_func)(child, data);
2225 }
2226 }
2227 break;
2228 default:
2229 break;
2230 }
2231 }
2232 i += len;
2233 }
2234
2235 return (int)RHASH_SIZE(all_children);
2236 }
2237
2238 static void
yield_each_children(const rb_iseq_t * child_iseq,void * data)2239 yield_each_children(const rb_iseq_t *child_iseq, void *data)
2240 {
2241 rb_yield(iseqw_new(child_iseq));
2242 }
2243
2244 /*
2245 * call-seq:
2246 * iseq.each_child{|child_iseq| ...} -> iseq
2247 *
2248 * Iterate all direct child instruction sequences.
2249 * Iteration order is implementation/version defined
2250 * so that people should not rely on the order.
2251 */
2252 static VALUE
iseqw_each_child(VALUE self)2253 iseqw_each_child(VALUE self)
2254 {
2255 const rb_iseq_t *iseq = iseqw_check(self);
2256 iseq_iterate_children(iseq, yield_each_children, NULL);
2257 return self;
2258 }
2259
2260 static void
push_event_info(const rb_iseq_t * iseq,rb_event_flag_t events,int line,VALUE ary)2261 push_event_info(const rb_iseq_t *iseq, rb_event_flag_t events, int line, VALUE ary)
2262 {
2263 #define C(ev, cstr, l) if (events & ev) rb_ary_push(ary, rb_ary_new_from_args(2, l, ID2SYM(rb_intern(cstr))));
2264 C(RUBY_EVENT_CLASS, "class", rb_iseq_first_lineno(iseq));
2265 C(RUBY_EVENT_CALL, "call", rb_iseq_first_lineno(iseq));
2266 C(RUBY_EVENT_B_CALL, "b_call", rb_iseq_first_lineno(iseq));
2267 C(RUBY_EVENT_LINE, "line", INT2FIX(line));
2268 C(RUBY_EVENT_END, "end", INT2FIX(line));
2269 C(RUBY_EVENT_RETURN, "return", INT2FIX(line));
2270 C(RUBY_EVENT_B_RETURN, "b_return", INT2FIX(line));
2271 #undef C
2272 }
2273
2274 /*
2275 * call-seq:
2276 * iseq.trace_points -> ary
2277 *
2278 * Return trace points in the instruction sequence.
2279 * Return an array of [line, event_symbol] pair.
2280 */
2281 static VALUE
iseqw_trace_points(VALUE self)2282 iseqw_trace_points(VALUE self)
2283 {
2284 const rb_iseq_t *iseq = iseqw_check(self);
2285 const struct rb_iseq_constant_body *const body = iseq->body;
2286 unsigned int i;
2287 VALUE ary = rb_ary_new();
2288
2289 for (i=0; i<body->insns_info.size; i++) {
2290 const struct iseq_insn_info_entry *entry = &body->insns_info.body[i];
2291 if (entry->events) {
2292 push_event_info(iseq, entry->events, entry->line_no, ary);
2293 }
2294 }
2295 return ary;
2296 }
2297
2298 /*
2299 * Returns the instruction sequence containing the given proc or method.
2300 *
2301 * For example, using irb:
2302 *
2303 * # a proc
2304 * > p = proc { num = 1 + 2 }
2305 * > RubyVM::InstructionSequence.of(p)
2306 * > #=> <RubyVM::InstructionSequence:block in irb_binding@(irb)>
2307 *
2308 * # for a method
2309 * > def foo(bar); puts bar; end
2310 * > RubyVM::InstructionSequence.of(method(:foo))
2311 * > #=> <RubyVM::InstructionSequence:foo@(irb)>
2312 *
2313 * Using ::compile_file:
2314 *
2315 * # /tmp/iseq_of.rb
2316 * def hello
2317 * puts "hello, world"
2318 * end
2319 *
2320 * $a_global_proc = proc { str = 'a' + 'b' }
2321 *
2322 * # in irb
2323 * > require '/tmp/iseq_of.rb'
2324 *
2325 * # first the method hello
2326 * > RubyVM::InstructionSequence.of(method(:hello))
2327 * > #=> #<RubyVM::InstructionSequence:0x007fb73d7cb1d0>
2328 *
2329 * # then the global proc
2330 * > RubyVM::InstructionSequence.of($a_global_proc)
2331 * > #=> #<RubyVM::InstructionSequence:0x007fb73d7caf78>
2332 */
2333 static VALUE
iseqw_s_of(VALUE klass,VALUE body)2334 iseqw_s_of(VALUE klass, VALUE body)
2335 {
2336 const rb_iseq_t *iseq = NULL;
2337
2338 rb_secure(1);
2339
2340 if (rb_obj_is_proc(body)) {
2341 iseq = vm_proc_iseq(body);
2342
2343 if (!rb_obj_is_iseq((VALUE)iseq)) {
2344 iseq = NULL;
2345 }
2346 }
2347 else if (rb_obj_is_method(body)) {
2348 iseq = rb_method_iseq(body);
2349 }
2350 else if (rb_typeddata_is_instance_of(body, &iseqw_data_type)) {
2351 return body;
2352 }
2353
2354 return iseq ? iseqw_new(iseq) : Qnil;
2355 }
2356
2357 /*
2358 * call-seq:
2359 * InstructionSequence.disasm(body) -> str
2360 * InstructionSequence.disassemble(body) -> str
2361 *
2362 * Takes +body+, a Method or Proc object, and returns a String with the
2363 * human readable instructions for +body+.
2364 *
2365 * For a Method object:
2366 *
2367 * # /tmp/method.rb
2368 * def hello
2369 * puts "hello, world"
2370 * end
2371 *
2372 * puts RubyVM::InstructionSequence.disasm(method(:hello))
2373 *
2374 * Produces:
2375 *
2376 * == disasm: <RubyVM::InstructionSequence:hello@/tmp/method.rb>============
2377 * 0000 trace 8 ( 1)
2378 * 0002 trace 1 ( 2)
2379 * 0004 putself
2380 * 0005 putstring "hello, world"
2381 * 0007 send :puts, 1, nil, 8, <ic:0>
2382 * 0013 trace 16 ( 3)
2383 * 0015 leave ( 2)
2384 *
2385 * For a Proc:
2386 *
2387 * # /tmp/proc.rb
2388 * p = proc { num = 1 + 2 }
2389 * puts RubyVM::InstructionSequence.disasm(p)
2390 *
2391 * Produces:
2392 *
2393 * == disasm: <RubyVM::InstructionSequence:block in <main>@/tmp/proc.rb>===
2394 * == catch table
2395 * | catch type: redo st: 0000 ed: 0012 sp: 0000 cont: 0000
2396 * | catch type: next st: 0000 ed: 0012 sp: 0000 cont: 0012
2397 * |------------------------------------------------------------------------
2398 * local table (size: 2, argc: 0 [opts: 0, rest: -1, post: 0, block: -1] s1)
2399 * [ 2] num
2400 * 0000 trace 1 ( 1)
2401 * 0002 putobject 1
2402 * 0004 putobject 2
2403 * 0006 opt_plus <ic:1>
2404 * 0008 dup
2405 * 0009 setlocal num, 0
2406 * 0012 leave
2407 *
2408 */
2409 static VALUE
iseqw_s_disasm(VALUE klass,VALUE body)2410 iseqw_s_disasm(VALUE klass, VALUE body)
2411 {
2412 VALUE iseqw = iseqw_s_of(klass, body);
2413 return NIL_P(iseqw) ? Qnil : rb_iseq_disasm(iseqw_check(iseqw));
2414 }
2415
2416 const char *
ruby_node_name(int node)2417 ruby_node_name(int node)
2418 {
2419 switch (node) {
2420 #include "node_name.inc"
2421 default:
2422 rb_bug("unknown node: %d", node);
2423 return 0;
2424 }
2425 }
2426
2427 #define DECL_SYMBOL(name) \
2428 static VALUE sym_##name
2429
2430 #define INIT_SYMBOL(name) \
2431 sym_##name = ID2SYM(rb_intern(#name))
2432
2433 static VALUE
register_label(struct st_table * table,unsigned long idx)2434 register_label(struct st_table *table, unsigned long idx)
2435 {
2436 VALUE sym = rb_str_intern(rb_sprintf("label_%lu", idx));
2437 st_insert(table, idx, sym);
2438 return sym;
2439 }
2440
2441 static VALUE
exception_type2symbol(VALUE type)2442 exception_type2symbol(VALUE type)
2443 {
2444 ID id;
2445 switch (type) {
2446 case CATCH_TYPE_RESCUE: CONST_ID(id, "rescue"); break;
2447 case CATCH_TYPE_ENSURE: CONST_ID(id, "ensure"); break;
2448 case CATCH_TYPE_RETRY: CONST_ID(id, "retry"); break;
2449 case CATCH_TYPE_BREAK: CONST_ID(id, "break"); break;
2450 case CATCH_TYPE_REDO: CONST_ID(id, "redo"); break;
2451 case CATCH_TYPE_NEXT: CONST_ID(id, "next"); break;
2452 default:
2453 rb_bug("unknown exception type: %d", (int)type);
2454 }
2455 return ID2SYM(id);
2456 }
2457
2458 static int
cdhash_each(VALUE key,VALUE value,VALUE ary)2459 cdhash_each(VALUE key, VALUE value, VALUE ary)
2460 {
2461 rb_ary_push(ary, obj_resurrect(key));
2462 rb_ary_push(ary, value);
2463 return ST_CONTINUE;
2464 }
2465
2466 static VALUE
iseq_data_to_ary(const rb_iseq_t * iseq)2467 iseq_data_to_ary(const rb_iseq_t *iseq)
2468 {
2469 unsigned int i;
2470 long l;
2471 const struct rb_iseq_constant_body *const iseq_body = iseq->body;
2472 const struct iseq_insn_info_entry *prev_insn_info;
2473 unsigned int pos;
2474 int last_line = 0;
2475 VALUE *seq, *iseq_original;
2476
2477 VALUE val = rb_ary_new();
2478 VALUE type; /* Symbol */
2479 VALUE locals = rb_ary_new();
2480 VALUE params = rb_hash_new();
2481 VALUE body = rb_ary_new(); /* [[:insn1, ...], ...] */
2482 VALUE nbody;
2483 VALUE exception = rb_ary_new(); /* [[....]] */
2484 VALUE misc = rb_hash_new();
2485
2486 static VALUE insn_syms[VM_INSTRUCTION_SIZE/2]; /* w/o-trace only */
2487 struct st_table *labels_table = st_init_numtable();
2488
2489 DECL_SYMBOL(top);
2490 DECL_SYMBOL(method);
2491 DECL_SYMBOL(block);
2492 DECL_SYMBOL(class);
2493 DECL_SYMBOL(rescue);
2494 DECL_SYMBOL(ensure);
2495 DECL_SYMBOL(eval);
2496 DECL_SYMBOL(main);
2497 DECL_SYMBOL(plain);
2498
2499 if (sym_top == 0) {
2500 int i;
2501 for (i=0; i<numberof(insn_syms); i++) {
2502 insn_syms[i] = ID2SYM(rb_intern(insn_name(i)));
2503 }
2504 INIT_SYMBOL(top);
2505 INIT_SYMBOL(method);
2506 INIT_SYMBOL(block);
2507 INIT_SYMBOL(class);
2508 INIT_SYMBOL(rescue);
2509 INIT_SYMBOL(ensure);
2510 INIT_SYMBOL(eval);
2511 INIT_SYMBOL(main);
2512 INIT_SYMBOL(plain);
2513 }
2514
2515 /* type */
2516 switch (iseq_body->type) {
2517 case ISEQ_TYPE_TOP: type = sym_top; break;
2518 case ISEQ_TYPE_METHOD: type = sym_method; break;
2519 case ISEQ_TYPE_BLOCK: type = sym_block; break;
2520 case ISEQ_TYPE_CLASS: type = sym_class; break;
2521 case ISEQ_TYPE_RESCUE: type = sym_rescue; break;
2522 case ISEQ_TYPE_ENSURE: type = sym_ensure; break;
2523 case ISEQ_TYPE_EVAL: type = sym_eval; break;
2524 case ISEQ_TYPE_MAIN: type = sym_main; break;
2525 case ISEQ_TYPE_PLAIN: type = sym_plain; break;
2526 default: rb_bug("unsupported iseq type: %d", (int)iseq_body->type);
2527 };
2528
2529 /* locals */
2530 for (i=0; i<iseq_body->local_table_size; i++) {
2531 ID lid = iseq_body->local_table[i];
2532 if (lid) {
2533 if (rb_id2str(lid)) {
2534 rb_ary_push(locals, ID2SYM(lid));
2535 }
2536 else { /* hidden variable from id_internal() */
2537 rb_ary_push(locals, ULONG2NUM(iseq_body->local_table_size-i+1));
2538 }
2539 }
2540 else {
2541 rb_ary_push(locals, ID2SYM(rb_intern("#arg_rest")));
2542 }
2543 }
2544
2545 /* params */
2546 {
2547 const struct rb_iseq_param_keyword *const keyword = iseq_body->param.keyword;
2548 int j;
2549
2550 if (iseq_body->param.flags.has_opt) {
2551 int len = iseq_body->param.opt_num + 1;
2552 VALUE arg_opt_labels = rb_ary_new2(len);
2553
2554 for (j = 0; j < len; j++) {
2555 VALUE l = register_label(labels_table, iseq_body->param.opt_table[j]);
2556 rb_ary_push(arg_opt_labels, l);
2557 }
2558 rb_hash_aset(params, ID2SYM(rb_intern("opt")), arg_opt_labels);
2559 }
2560
2561 /* commit */
2562 if (iseq_body->param.flags.has_lead) rb_hash_aset(params, ID2SYM(rb_intern("lead_num")), INT2FIX(iseq_body->param.lead_num));
2563 if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_num")), INT2FIX(iseq_body->param.post_num));
2564 if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_start")), INT2FIX(iseq_body->param.post_start));
2565 if (iseq_body->param.flags.has_rest) rb_hash_aset(params, ID2SYM(rb_intern("rest_start")), INT2FIX(iseq_body->param.rest_start));
2566 if (iseq_body->param.flags.has_block) rb_hash_aset(params, ID2SYM(rb_intern("block_start")), INT2FIX(iseq_body->param.block_start));
2567 if (iseq_body->param.flags.has_kw) {
2568 VALUE keywords = rb_ary_new();
2569 int i, j;
2570 for (i=0; i<keyword->required_num; i++) {
2571 rb_ary_push(keywords, ID2SYM(keyword->table[i]));
2572 }
2573 for (j=0; i<keyword->num; i++, j++) {
2574 VALUE key = rb_ary_new_from_args(1, ID2SYM(keyword->table[i]));
2575 if (keyword->default_values[j] != Qundef) {
2576 rb_ary_push(key, keyword->default_values[j]);
2577 }
2578 rb_ary_push(keywords, key);
2579 }
2580
2581 rb_hash_aset(params, ID2SYM(rb_intern("kwbits")),
2582 INT2FIX(keyword->bits_start));
2583 rb_hash_aset(params, ID2SYM(rb_intern("keyword")), keywords);
2584 }
2585 if (iseq_body->param.flags.has_kwrest) rb_hash_aset(params, ID2SYM(rb_intern("kwrest")), INT2FIX(keyword->rest_start));
2586 if (iseq_body->param.flags.ambiguous_param0) rb_hash_aset(params, ID2SYM(rb_intern("ambiguous_param0")), Qtrue);
2587 }
2588
2589 /* body */
2590 iseq_original = rb_iseq_original_iseq((rb_iseq_t *)iseq);
2591
2592 for (seq = iseq_original; seq < iseq_original + iseq_body->iseq_size; ) {
2593 VALUE insn = *seq++;
2594 int j, len = insn_len(insn);
2595 VALUE *nseq = seq + len - 1;
2596 VALUE ary = rb_ary_new2(len);
2597
2598 rb_ary_push(ary, insn_syms[insn%numberof(insn_syms)]);
2599 for (j=0; j<len-1; j++, seq++) {
2600 switch (insn_op_type(insn, j)) {
2601 case TS_OFFSET: {
2602 unsigned long idx = nseq - iseq_original + *seq;
2603 rb_ary_push(ary, register_label(labels_table, idx));
2604 break;
2605 }
2606 case TS_LINDEX:
2607 case TS_NUM:
2608 rb_ary_push(ary, INT2FIX(*seq));
2609 break;
2610 case TS_VALUE:
2611 rb_ary_push(ary, obj_resurrect(*seq));
2612 break;
2613 case TS_ISEQ:
2614 {
2615 const rb_iseq_t *iseq = (rb_iseq_t *)*seq;
2616 if (iseq) {
2617 VALUE val = iseq_data_to_ary(rb_iseq_check(iseq));
2618 rb_ary_push(ary, val);
2619 }
2620 else {
2621 rb_ary_push(ary, Qnil);
2622 }
2623 }
2624 break;
2625 case TS_GENTRY:
2626 {
2627 struct rb_global_entry *entry = (struct rb_global_entry *)*seq;
2628 rb_ary_push(ary, ID2SYM(entry->id));
2629 }
2630 break;
2631 case TS_IC:
2632 case TS_ISE:
2633 {
2634 union iseq_inline_storage_entry *is = (union iseq_inline_storage_entry *)*seq;
2635 rb_ary_push(ary, INT2FIX(is - iseq_body->is_entries));
2636 }
2637 break;
2638 case TS_CALLINFO:
2639 {
2640 struct rb_call_info *ci = (struct rb_call_info *)*seq;
2641 VALUE e = rb_hash_new();
2642 int orig_argc = ci->orig_argc;
2643
2644 rb_hash_aset(e, ID2SYM(rb_intern("mid")), ci->mid ? ID2SYM(ci->mid) : Qnil);
2645 rb_hash_aset(e, ID2SYM(rb_intern("flag")), UINT2NUM(ci->flag));
2646
2647 if (ci->flag & VM_CALL_KWARG) {
2648 struct rb_call_info_with_kwarg *ci_kw = (struct rb_call_info_with_kwarg *)ci;
2649 int i;
2650 VALUE kw = rb_ary_new2((long)ci_kw->kw_arg->keyword_len);
2651
2652 orig_argc -= ci_kw->kw_arg->keyword_len;
2653 for (i = 0; i < ci_kw->kw_arg->keyword_len; i++) {
2654 rb_ary_push(kw, ci_kw->kw_arg->keywords[i]);
2655 }
2656 rb_hash_aset(e, ID2SYM(rb_intern("kw_arg")), kw);
2657 }
2658
2659 rb_hash_aset(e, ID2SYM(rb_intern("orig_argc")),
2660 INT2FIX(orig_argc));
2661 rb_ary_push(ary, e);
2662 }
2663 break;
2664 case TS_CALLCACHE:
2665 rb_ary_push(ary, Qfalse);
2666 break;
2667 case TS_ID:
2668 rb_ary_push(ary, ID2SYM(*seq));
2669 break;
2670 case TS_CDHASH:
2671 {
2672 VALUE hash = *seq;
2673 VALUE val = rb_ary_new();
2674 int i;
2675
2676 rb_hash_foreach(hash, cdhash_each, val);
2677
2678 for (i=0; i<RARRAY_LEN(val); i+=2) {
2679 VALUE pos = FIX2INT(rb_ary_entry(val, i+1));
2680 unsigned long idx = nseq - iseq_original + pos;
2681
2682 rb_ary_store(val, i+1,
2683 register_label(labels_table, idx));
2684 }
2685 rb_ary_push(ary, val);
2686 }
2687 break;
2688 case TS_FUNCPTR:
2689 {
2690 #if SIZEOF_VALUE <= SIZEOF_LONG
2691 VALUE val = LONG2NUM((SIGNED_VALUE)*seq);
2692 #else
2693 VALUE val = LL2NUM((SIGNED_VALUE)*seq);
2694 #endif
2695 rb_ary_push(ary, val);
2696 }
2697 break;
2698 default:
2699 rb_bug("unknown operand: %c", insn_op_type(insn, j));
2700 }
2701 }
2702 rb_ary_push(body, ary);
2703 }
2704
2705 nbody = body;
2706
2707 /* exception */
2708 if (iseq_body->catch_table) for (i=0; i<iseq_body->catch_table->size; i++) {
2709 VALUE ary = rb_ary_new();
2710 const struct iseq_catch_table_entry *entry = &iseq_body->catch_table->entries[i];
2711 rb_ary_push(ary, exception_type2symbol(entry->type));
2712 if (entry->iseq) {
2713 rb_ary_push(ary, iseq_data_to_ary(rb_iseq_check(entry->iseq)));
2714 }
2715 else {
2716 rb_ary_push(ary, Qnil);
2717 }
2718 rb_ary_push(ary, register_label(labels_table, entry->start));
2719 rb_ary_push(ary, register_label(labels_table, entry->end));
2720 rb_ary_push(ary, register_label(labels_table, entry->cont));
2721 rb_ary_push(ary, UINT2NUM(entry->sp));
2722 rb_ary_push(exception, ary);
2723 }
2724
2725 /* make body with labels and insert line number */
2726 body = rb_ary_new();
2727 prev_insn_info = NULL;
2728
2729 for (l=0, pos=0; l<RARRAY_LEN(nbody); l++) {
2730 const struct iseq_insn_info_entry *info;
2731 VALUE ary = RARRAY_AREF(nbody, l);
2732 st_data_t label;
2733
2734 if (st_lookup(labels_table, pos, &label)) {
2735 rb_ary_push(body, (VALUE)label);
2736 }
2737
2738 info = get_insn_info(iseq, pos);
2739
2740 if (prev_insn_info != info) {
2741 int line = info->line_no;
2742 rb_event_flag_t events = info->events;
2743
2744 if (line > 0 && last_line != line) {
2745 rb_ary_push(body, INT2FIX(line));
2746 last_line = line;
2747 }
2748 #define CHECK_EVENT(ev) if (events & ev) rb_ary_push(body, ID2SYM(rb_intern(#ev)));
2749 CHECK_EVENT(RUBY_EVENT_LINE);
2750 CHECK_EVENT(RUBY_EVENT_CLASS);
2751 CHECK_EVENT(RUBY_EVENT_END);
2752 CHECK_EVENT(RUBY_EVENT_CALL);
2753 CHECK_EVENT(RUBY_EVENT_RETURN);
2754 CHECK_EVENT(RUBY_EVENT_B_CALL);
2755 CHECK_EVENT(RUBY_EVENT_B_RETURN);
2756 #undef CHECK_EVENT
2757 prev_insn_info = info;
2758 }
2759
2760 rb_ary_push(body, ary);
2761 pos += RARRAY_LENINT(ary); /* reject too huge data */
2762 }
2763 RB_GC_GUARD(nbody);
2764
2765 st_free_table(labels_table);
2766
2767 rb_hash_aset(misc, ID2SYM(rb_intern("arg_size")), INT2FIX(iseq_body->param.size));
2768 rb_hash_aset(misc, ID2SYM(rb_intern("local_size")), INT2FIX(iseq_body->local_table_size));
2769 rb_hash_aset(misc, ID2SYM(rb_intern("stack_max")), INT2FIX(iseq_body->stack_max));
2770 rb_hash_aset(misc, ID2SYM(rb_intern("node_id")), INT2FIX(iseq_body->location.node_id));
2771 rb_hash_aset(misc, ID2SYM(rb_intern("code_location")),
2772 rb_ary_new_from_args(4,
2773 INT2FIX(iseq_body->location.code_location.beg_pos.lineno),
2774 INT2FIX(iseq_body->location.code_location.beg_pos.column),
2775 INT2FIX(iseq_body->location.code_location.end_pos.lineno),
2776 INT2FIX(iseq_body->location.code_location.end_pos.column)));
2777
2778 /*
2779 * [:magic, :major_version, :minor_version, :format_type, :misc,
2780 * :name, :path, :absolute_path, :start_lineno, :type, :locals, :args,
2781 * :catch_table, :bytecode]
2782 */
2783 rb_ary_push(val, rb_str_new2("YARVInstructionSequence/SimpleDataFormat"));
2784 rb_ary_push(val, INT2FIX(ISEQ_MAJOR_VERSION)); /* major */
2785 rb_ary_push(val, INT2FIX(ISEQ_MINOR_VERSION)); /* minor */
2786 rb_ary_push(val, INT2FIX(1));
2787 rb_ary_push(val, misc);
2788 rb_ary_push(val, iseq_body->location.label);
2789 rb_ary_push(val, rb_iseq_path(iseq));
2790 rb_ary_push(val, rb_iseq_realpath(iseq));
2791 rb_ary_push(val, iseq_body->location.first_lineno);
2792 rb_ary_push(val, type);
2793 rb_ary_push(val, locals);
2794 rb_ary_push(val, params);
2795 rb_ary_push(val, exception);
2796 rb_ary_push(val, body);
2797 return val;
2798 }
2799
2800 VALUE
rb_iseq_parameters(const rb_iseq_t * iseq,int is_proc)2801 rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc)
2802 {
2803 int i, r;
2804 const struct rb_iseq_constant_body *const body = iseq->body;
2805 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
2806 VALUE a, args = rb_ary_new2(body->param.size);
2807 ID req, opt, rest, block, key, keyrest;
2808 #define PARAM_TYPE(type) rb_ary_push(a = rb_ary_new2(2), ID2SYM(type))
2809 #define PARAM_ID(i) body->local_table[(i)]
2810 #define PARAM(i, type) ( \
2811 PARAM_TYPE(type), \
2812 rb_id2str(PARAM_ID(i)) ? \
2813 rb_ary_push(a, ID2SYM(PARAM_ID(i))) : \
2814 a)
2815
2816 CONST_ID(req, "req");
2817 CONST_ID(opt, "opt");
2818 if (is_proc) {
2819 for (i = 0; i < body->param.lead_num; i++) {
2820 PARAM_TYPE(opt);
2821 rb_ary_push(a, rb_id2str(PARAM_ID(i)) ? ID2SYM(PARAM_ID(i)) : Qnil);
2822 rb_ary_push(args, a);
2823 }
2824 }
2825 else {
2826 for (i = 0; i < body->param.lead_num; i++) {
2827 rb_ary_push(args, PARAM(i, req));
2828 }
2829 }
2830 r = body->param.lead_num + body->param.opt_num;
2831 for (; i < r; i++) {
2832 PARAM_TYPE(opt);
2833 if (rb_id2str(PARAM_ID(i))) {
2834 rb_ary_push(a, ID2SYM(PARAM_ID(i)));
2835 }
2836 rb_ary_push(args, a);
2837 }
2838 if (body->param.flags.has_rest) {
2839 CONST_ID(rest, "rest");
2840 rb_ary_push(args, PARAM(body->param.rest_start, rest));
2841 }
2842 r = body->param.post_start + body->param.post_num;
2843 if (is_proc) {
2844 for (i = body->param.post_start; i < r; i++) {
2845 PARAM_TYPE(opt);
2846 rb_ary_push(a, rb_id2str(PARAM_ID(i)) ? ID2SYM(PARAM_ID(i)) : Qnil);
2847 rb_ary_push(args, a);
2848 }
2849 }
2850 else {
2851 for (i = body->param.post_start; i < r; i++) {
2852 rb_ary_push(args, PARAM(i, req));
2853 }
2854 }
2855 if (body->param.flags.has_kw) {
2856 i = 0;
2857 if (keyword->required_num > 0) {
2858 ID keyreq;
2859 CONST_ID(keyreq, "keyreq");
2860 for (; i < keyword->required_num; i++) {
2861 PARAM_TYPE(keyreq);
2862 if (rb_id2str(keyword->table[i])) {
2863 rb_ary_push(a, ID2SYM(keyword->table[i]));
2864 }
2865 rb_ary_push(args, a);
2866 }
2867 }
2868 CONST_ID(key, "key");
2869 for (; i < keyword->num; i++) {
2870 PARAM_TYPE(key);
2871 if (rb_id2str(keyword->table[i])) {
2872 rb_ary_push(a, ID2SYM(keyword->table[i]));
2873 }
2874 rb_ary_push(args, a);
2875 }
2876 }
2877 if (body->param.flags.has_kwrest) {
2878 CONST_ID(keyrest, "keyrest");
2879 rb_ary_push(args, PARAM(keyword->rest_start, keyrest));
2880 }
2881 if (body->param.flags.has_block) {
2882 CONST_ID(block, "block");
2883 rb_ary_push(args, PARAM(body->param.block_start, block));
2884 }
2885 return args;
2886 }
2887
2888 VALUE
rb_iseq_defined_string(enum defined_type type)2889 rb_iseq_defined_string(enum defined_type type)
2890 {
2891 static const char expr_names[][18] = {
2892 "nil",
2893 "instance-variable",
2894 "local-variable",
2895 "global-variable",
2896 "class variable",
2897 "constant",
2898 "method",
2899 "yield",
2900 "super",
2901 "self",
2902 "true",
2903 "false",
2904 "assignment",
2905 "expression",
2906 };
2907 const char *estr;
2908 VALUE *defs, str;
2909
2910 if ((unsigned)(type - 1) >= (unsigned)numberof(expr_names)) return 0;
2911 estr = expr_names[type - 1];
2912 if (!estr[0]) return 0;
2913 defs = GET_VM()->defined_strings;
2914 if (!defs) {
2915 defs = ruby_xcalloc(numberof(expr_names), sizeof(VALUE));
2916 GET_VM()->defined_strings = defs;
2917 }
2918 str = defs[type-1];
2919 if (!str) {
2920 str = rb_str_new_cstr(estr);
2921 OBJ_FREEZE(str);
2922 defs[type-1] = str;
2923 rb_gc_register_mark_object(str);
2924 }
2925 return str;
2926 }
2927
2928 /* A map from encoded_insn to insn_data: decoded insn number, its len,
2929 * non-trace version of encoded insn, and trace version. */
2930
2931 static st_table *encoded_insn_data;
2932 typedef struct insn_data_struct {
2933 int insn;
2934 int insn_len;
2935 void *notrace_encoded_insn;
2936 void *trace_encoded_insn;
2937 } insn_data_t;
2938 static insn_data_t insn_data[VM_INSTRUCTION_SIZE/2];
2939
2940 void
rb_vm_encoded_insn_data_table_init(void)2941 rb_vm_encoded_insn_data_table_init(void)
2942 {
2943 #if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
2944 const void * const *table = rb_vm_get_insns_address_table();
2945 #define INSN_CODE(insn) ((VALUE)table[insn])
2946 #else
2947 #define INSN_CODE(insn) (insn)
2948 #endif
2949 st_data_t insn;
2950 encoded_insn_data = st_init_numtable_with_size(VM_INSTRUCTION_SIZE / 2);
2951
2952 for (insn = 0; insn < VM_INSTRUCTION_SIZE/2; insn++) {
2953 st_data_t key1 = (st_data_t)INSN_CODE(insn);
2954 st_data_t key2 = (st_data_t)INSN_CODE(insn + VM_INSTRUCTION_SIZE/2);
2955
2956 insn_data[insn].insn = (int)insn;
2957 insn_data[insn].insn_len = insn_len(insn);
2958 insn_data[insn].notrace_encoded_insn = (void *) key1;
2959 insn_data[insn].trace_encoded_insn = (void *) key2;
2960
2961 st_add_direct(encoded_insn_data, key1, (st_data_t)&insn_data[insn]);
2962 st_add_direct(encoded_insn_data, key2, (st_data_t)&insn_data[insn]);
2963 }
2964 }
2965
2966 int
rb_vm_insn_addr2insn(const void * addr)2967 rb_vm_insn_addr2insn(const void *addr)
2968 {
2969 st_data_t key = (st_data_t)addr;
2970 st_data_t val;
2971
2972 if (st_lookup(encoded_insn_data, key, &val)) {
2973 insn_data_t *e = (insn_data_t *)val;
2974 return (int)e->insn;
2975 }
2976
2977 rb_bug("rb_vm_insn_addr2insn: invalid insn address: %p", addr);
2978 }
2979
2980 static inline int
encoded_iseq_trace_instrument(VALUE * iseq_encoded_insn,rb_event_flag_t turnon)2981 encoded_iseq_trace_instrument(VALUE *iseq_encoded_insn, rb_event_flag_t turnon)
2982 {
2983 st_data_t key = (st_data_t)*iseq_encoded_insn;
2984 st_data_t val;
2985
2986 if (st_lookup(encoded_insn_data, key, &val)) {
2987 insn_data_t *e = (insn_data_t *)val;
2988 *iseq_encoded_insn = (VALUE) (turnon ? e->trace_encoded_insn : e->notrace_encoded_insn);
2989 return e->insn_len;
2990 }
2991
2992 rb_bug("trace_instrument: invalid insn address: %p", (void *)*iseq_encoded_insn);
2993 }
2994
2995 void
rb_iseq_trace_flag_cleared(const rb_iseq_t * iseq,size_t pos)2996 rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos)
2997 {
2998 const struct rb_iseq_constant_body *const body = iseq->body;
2999 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3000 encoded_iseq_trace_instrument(&iseq_encoded[pos], 0);
3001 }
3002
3003 static int
iseq_add_local_tracepoint(const rb_iseq_t * iseq,rb_event_flag_t turnon_events,VALUE tpval,unsigned int target_line)3004 iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line)
3005 {
3006 unsigned int pc;
3007 int n = 0;
3008 const struct rb_iseq_constant_body *const body = iseq->body;
3009 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3010
3011 VM_ASSERT(ISEQ_EXECUTABLE_P(iseq));
3012
3013 for (pc=0; pc<body->iseq_size;) {
3014 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pc);
3015 rb_event_flag_t pc_events = entry->events;
3016 rb_event_flag_t target_events = turnon_events;
3017 unsigned int line = (int)entry->line_no;
3018
3019 if (target_line == 0 || target_line == line) {
3020 /* ok */
3021 }
3022 else {
3023 target_events &= ~RUBY_EVENT_LINE;
3024 }
3025
3026 if (pc_events & target_events) {
3027 n++;
3028 }
3029 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (target_events | iseq->aux.exec.global_trace_events));
3030 }
3031
3032 if (n > 0) {
3033 if (iseq->aux.exec.local_hooks == NULL) {
3034 ((rb_iseq_t *)iseq)->aux.exec.local_hooks = RB_ZALLOC(rb_hook_list_t);
3035 }
3036 rb_hook_list_connect_tracepoint((VALUE)iseq, iseq->aux.exec.local_hooks, tpval, target_line);
3037 }
3038
3039 return n;
3040 }
3041
3042 struct trace_set_local_events_struct {
3043 rb_event_flag_t turnon_events;
3044 VALUE tpval;
3045 unsigned int target_line;
3046 int n;
3047 };
3048
3049 static void
iseq_add_local_tracepoint_i(const rb_iseq_t * iseq,void * p)3050 iseq_add_local_tracepoint_i(const rb_iseq_t *iseq, void *p)
3051 {
3052 struct trace_set_local_events_struct *data = (struct trace_set_local_events_struct *)p;
3053 data->n += iseq_add_local_tracepoint(iseq, data->turnon_events, data->tpval, data->target_line);
3054 iseq_iterate_children(iseq, iseq_add_local_tracepoint_i, p);
3055 }
3056
3057 int
rb_iseq_add_local_tracepoint_recursively(const rb_iseq_t * iseq,rb_event_flag_t turnon_events,VALUE tpval,unsigned int target_line)3058 rb_iseq_add_local_tracepoint_recursively(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line)
3059 {
3060 struct trace_set_local_events_struct data;
3061 data.turnon_events = turnon_events;
3062 data.tpval = tpval;
3063 data.target_line = target_line;
3064 data.n = 0;
3065
3066 iseq_add_local_tracepoint_i(iseq, (void *)&data);
3067 if (0) rb_funcall(Qnil, rb_intern("puts"), 1, rb_iseq_disasm(iseq)); /* for debug */
3068 return data.n;
3069 }
3070
3071 static int
iseq_remove_local_tracepoint(const rb_iseq_t * iseq,VALUE tpval)3072 iseq_remove_local_tracepoint(const rb_iseq_t *iseq, VALUE tpval)
3073 {
3074 int n = 0;
3075
3076 if (iseq->aux.exec.local_hooks) {
3077 unsigned int pc;
3078 const struct rb_iseq_constant_body *const body = iseq->body;
3079 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3080 rb_event_flag_t local_events = 0;
3081
3082 rb_hook_list_remove_tracepoint(iseq->aux.exec.local_hooks, tpval);
3083 local_events = iseq->aux.exec.local_hooks->events;
3084
3085 if (local_events == 0) {
3086 if (iseq->aux.exec.local_hooks->running == 0) {
3087 rb_hook_list_free(iseq->aux.exec.local_hooks);
3088 }
3089 ((rb_iseq_t *)iseq)->aux.exec.local_hooks = NULL;
3090 }
3091
3092 for (pc = 0; pc<body->iseq_size;) {
3093 rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc);
3094 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (local_events | iseq->aux.exec.global_trace_events));
3095 }
3096 }
3097 return n;
3098 }
3099
3100 struct trace_clear_local_events_struct {
3101 VALUE tpval;
3102 int n;
3103 };
3104
3105 static void
iseq_remove_local_tracepoint_i(const rb_iseq_t * iseq,void * p)3106 iseq_remove_local_tracepoint_i(const rb_iseq_t *iseq, void *p)
3107 {
3108 struct trace_clear_local_events_struct *data = (struct trace_clear_local_events_struct *)p;
3109 data->n += iseq_remove_local_tracepoint(iseq, data->tpval);
3110 iseq_iterate_children(iseq, iseq_remove_local_tracepoint_i, p);
3111 }
3112
3113 int
rb_iseq_remove_local_tracepoint_recursively(const rb_iseq_t * iseq,VALUE tpval)3114 rb_iseq_remove_local_tracepoint_recursively(const rb_iseq_t *iseq, VALUE tpval)
3115 {
3116 struct trace_clear_local_events_struct data;
3117 data.tpval = tpval;
3118 data.n = 0;
3119
3120 iseq_remove_local_tracepoint_i(iseq, (void *)&data);
3121 return data.n;
3122 }
3123
3124 void
rb_iseq_trace_set(const rb_iseq_t * iseq,rb_event_flag_t turnon_events)3125 rb_iseq_trace_set(const rb_iseq_t *iseq, rb_event_flag_t turnon_events)
3126 {
3127 if (iseq->aux.exec.global_trace_events == turnon_events) {
3128 return;
3129 }
3130
3131 if (!ISEQ_EXECUTABLE_P(iseq)) {
3132 /* this is building ISeq */
3133 return;
3134 }
3135 else {
3136 unsigned int pc;
3137 const struct rb_iseq_constant_body *const body = iseq->body;
3138 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3139 rb_event_flag_t enabled_events;
3140 rb_event_flag_t local_events = iseq->aux.exec.local_hooks ? iseq->aux.exec.local_hooks->events : 0;
3141 ((rb_iseq_t *)iseq)->aux.exec.global_trace_events = turnon_events;
3142 enabled_events = turnon_events | local_events;
3143
3144 for (pc=0; pc<body->iseq_size;) {
3145 rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc);
3146 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & enabled_events);
3147 }
3148 }
3149 }
3150
3151 static int
trace_set_i(void * vstart,void * vend,size_t stride,void * data)3152 trace_set_i(void *vstart, void *vend, size_t stride, void *data)
3153 {
3154 rb_event_flag_t turnon_events = *(rb_event_flag_t *)data;
3155
3156 VALUE v = (VALUE)vstart;
3157 for (; v != (VALUE)vend; v += stride) {
3158 if (rb_obj_is_iseq(v)) {
3159 rb_iseq_trace_set(rb_iseq_check((rb_iseq_t *)v), turnon_events);
3160 }
3161 }
3162 return 0;
3163 }
3164
3165 void
rb_iseq_trace_set_all(rb_event_flag_t turnon_events)3166 rb_iseq_trace_set_all(rb_event_flag_t turnon_events)
3167 {
3168 rb_objspace_each_objects(trace_set_i, &turnon_events);
3169 }
3170
3171 /* This is exported since Ruby 2.5 but not internally used for now. If you're going to use this, please
3172 update `ruby_vm_event_enabled_global_flags` and set `mjit_call_p = FALSE` as well to cancel MJIT code. */
3173 void
rb_iseq_trace_on_all(void)3174 rb_iseq_trace_on_all(void)
3175 {
3176 rb_iseq_trace_set_all(RUBY_EVENT_TRACEPOINT_ALL);
3177 }
3178
3179 VALUE
rb_iseqw_local_variables(VALUE iseqval)3180 rb_iseqw_local_variables(VALUE iseqval)
3181 {
3182 return rb_iseq_local_variables(iseqw_check(iseqval));
3183 }
3184
3185 /*
3186 * call-seq:
3187 * iseq.to_binary(extra_data = nil) -> binary str
3188 *
3189 * Returns serialized iseq binary format data as a String object.
3190 * A corresponding iseq object is created by
3191 * RubyVM::InstructionSequence.load_from_binary() method.
3192 *
3193 * String extra_data will be saved with binary data.
3194 * You can access this data with
3195 * RubyVM::InstructionSequence.load_from_binary_extra_data(binary).
3196 *
3197 * Note that the translated binary data is not portable.
3198 * You can not move this binary data to another machine.
3199 * You can not use the binary data which is created by another
3200 * version/another architecture of Ruby.
3201 */
3202 static VALUE
iseqw_to_binary(int argc,VALUE * argv,VALUE self)3203 iseqw_to_binary(int argc, VALUE *argv, VALUE self)
3204 {
3205 VALUE opt = !rb_check_arity(argc, 0, 1) ? Qnil : argv[0];
3206 return rb_iseq_ibf_dump(iseqw_check(self), opt);
3207 }
3208
3209 /*
3210 * call-seq:
3211 * RubyVM::InstructionSequence.load_from_binary(binary) -> iseq
3212 *
3213 * Load an iseq object from binary format String object
3214 * created by RubyVM::InstructionSequence.to_binary.
3215 *
3216 * This loader does not have a verifier, so that loading broken/modified
3217 * binary causes critical problem.
3218 *
3219 * You should not load binary data provided by others.
3220 * You should use binary data translated by yourself.
3221 */
3222 static VALUE
iseqw_s_load_from_binary(VALUE self,VALUE str)3223 iseqw_s_load_from_binary(VALUE self, VALUE str)
3224 {
3225 return iseqw_new(rb_iseq_ibf_load(str));
3226 }
3227
3228 /*
3229 * call-seq:
3230 * RubyVM::InstructionSequence.load_from_binary_extra_data(binary) -> str
3231 *
3232 * Load extra data embed into binary format String object.
3233 */
3234 static VALUE
iseqw_s_load_from_binary_extra_data(VALUE self,VALUE str)3235 iseqw_s_load_from_binary_extra_data(VALUE self, VALUE str)
3236 {
3237 return rb_iseq_ibf_load_extra_data(str);
3238 }
3239
3240 #if VM_INSN_INFO_TABLE_IMPL == 2
3241
3242 /* An implementation of succinct bit-vector for insn_info table.
3243 *
3244 * A succinct bit-vector is a small and efficient data structure that provides
3245 * a bit-vector augmented with an index for O(1) rank operation:
3246 *
3247 * rank(bv, n): the number of 1's within a range from index 0 to index n
3248 *
3249 * This can be used to lookup insn_info table from PC.
3250 * For example, consider the following iseq and insn_info_table:
3251 *
3252 * iseq insn_info_table
3253 * PC insn+operand position lineno event
3254 * 0: insn1 0: 1 [Li]
3255 * 2: insn2 2: 2 [Li] <= (A)
3256 * 5: insn3 8: 3 [Li] <= (B)
3257 * 8: insn4
3258 *
3259 * In this case, a succinct bit-vector whose indexes 0, 2, 8 is "1" and
3260 * other indexes is "0", i.e., "101000001", is created.
3261 * To lookup the lineno of insn2, calculate rank("10100001", 2) = 2, so
3262 * the line (A) is the entry in question.
3263 * To lookup the lineno of insn4, calculate rank("10100001", 8) = 3, so
3264 * the line (B) is the entry in question.
3265 *
3266 * A naive implementatoin of succinct bit-vector works really well
3267 * not only for large size but also for small size. However, it has
3268 * tiny overhead for very small size. So, this implementation consist
3269 * of two parts: one part is the "immediate" table that keeps rank result
3270 * as a raw table, and the other part is a normal succinct bit-vector.
3271 */
3272
3273 #define IMMEDIATE_TABLE_SIZE 54 /* a multiple of 9, and < 128 */
3274
3275 struct succ_index_table {
3276 uint64_t imm_part[IMMEDIATE_TABLE_SIZE / 9];
3277 struct succ_dict_block {
3278 unsigned int rank;
3279 uint64_t small_block_ranks; /* 9 bits * 7 = 63 bits */
3280 uint64_t bits[512/64];
3281 } succ_part[FLEX_ARY_LEN];
3282 };
3283
3284 #define imm_block_rank_set(v, i, r) (v) |= (uint64_t)(r) << (7 * (i))
3285 #define imm_block_rank_get(v, i) (((int)((v) >> ((i) * 7))) & 0x7f)
3286 #define small_block_rank_set(v, i, r) (v) |= (uint64_t)(r) << (9 * ((i) - 1))
3287 #define small_block_rank_get(v, i) ((i) == 0 ? 0 : (((int)((v) >> (((i) - 1) * 9))) & 0x1ff))
3288
3289 static struct succ_index_table *
succ_index_table_create(int max_pos,int * data,int size)3290 succ_index_table_create(int max_pos, int *data, int size)
3291 {
3292 const int imm_size = (max_pos < IMMEDIATE_TABLE_SIZE ? max_pos + 8 : IMMEDIATE_TABLE_SIZE) / 9;
3293 const int succ_size = (max_pos < IMMEDIATE_TABLE_SIZE ? 0 : (max_pos - IMMEDIATE_TABLE_SIZE + 511)) / 512;
3294 struct succ_index_table *sd = ruby_xcalloc(imm_size * sizeof(uint64_t) + succ_size * sizeof(struct succ_dict_block), 1); /* zero cleared */
3295 int i, j, k, r;
3296
3297 r = 0;
3298 for (j = 0; j < imm_size; j++) {
3299 for (i = 0; i < 9; i++) {
3300 if (r < size && data[r] == j * 9 + i) r++;
3301 imm_block_rank_set(sd->imm_part[j], i, r);
3302 }
3303 }
3304 for (k = 0; k < succ_size; k++) {
3305 struct succ_dict_block *sd_block = &sd->succ_part[k];
3306 int small_rank = 0;
3307 sd_block->rank = r;
3308 for (j = 0; j < 8; j++) {
3309 uint64_t bits = 0;
3310 if (j) small_block_rank_set(sd_block->small_block_ranks, j, small_rank);
3311 for (i = 0; i < 64; i++) {
3312 if (r < size && data[r] == k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE) {
3313 bits |= ((uint64_t)1) << i;
3314 r++;
3315 }
3316 }
3317 sd_block->bits[j] = bits;
3318 small_rank += rb_popcount64(bits);
3319 }
3320 }
3321 return sd;
3322 }
3323
3324 static unsigned int *
succ_index_table_invert(int max_pos,struct succ_index_table * sd,int size)3325 succ_index_table_invert(int max_pos, struct succ_index_table *sd, int size)
3326 {
3327 const int imm_size = (max_pos < IMMEDIATE_TABLE_SIZE ? max_pos + 8 : IMMEDIATE_TABLE_SIZE) / 9;
3328 const int succ_size = (max_pos < IMMEDIATE_TABLE_SIZE ? 0 : (max_pos - IMMEDIATE_TABLE_SIZE + 511)) / 512;
3329 unsigned int *positions = ruby_xmalloc(sizeof(unsigned int) * size), *p;
3330 int i, j, k, r = -1;
3331 p = positions;
3332 for (j = 0; j < imm_size; j++) {
3333 for (i = 0; i < 9; i++) {
3334 int nr = imm_block_rank_get(sd->imm_part[j], i);
3335 if (r != nr) *p++ = j * 9 + i;
3336 r = nr;
3337 }
3338 }
3339 for (k = 0; k < succ_size; k++) {
3340 for (j = 0; j < 8; j++) {
3341 for (i = 0; i < 64; i++) {
3342 if (sd->succ_part[k].bits[j] & (((uint64_t)1) << i)) {
3343 *p++ = k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE;
3344 }
3345 }
3346 }
3347 }
3348 return positions;
3349 }
3350
3351 static int
succ_index_lookup(const struct succ_index_table * sd,int x)3352 succ_index_lookup(const struct succ_index_table *sd, int x)
3353 {
3354 if (x < IMMEDIATE_TABLE_SIZE) {
3355 const int i = x / 9;
3356 const int j = x % 9;
3357 return imm_block_rank_get(sd->imm_part[i], j);
3358 }
3359 else {
3360 const int block_index = (x - IMMEDIATE_TABLE_SIZE) / 512;
3361 const struct succ_dict_block *block = &sd->succ_part[block_index];
3362 const int block_bit_index = (x - IMMEDIATE_TABLE_SIZE) % 512;
3363 const int small_block_index = block_bit_index / 64;
3364 const int small_block_popcount = small_block_rank_get(block->small_block_ranks, small_block_index);
3365 const int popcnt = rb_popcount64(block->bits[small_block_index] << (63 - block_bit_index % 64));
3366
3367 return block->rank + small_block_popcount + popcnt;
3368 }
3369 }
3370 #endif
3371
3372 /*
3373 * Document-class: RubyVM::InstructionSequence
3374 *
3375 * The InstructionSequence class represents a compiled sequence of
3376 * instructions for the Ruby Virtual Machine.
3377 *
3378 * With it, you can get a handle to the instructions that make up a method or
3379 * a proc, compile strings of Ruby code down to VM instructions, and
3380 * disassemble instruction sequences to strings for easy inspection. It is
3381 * mostly useful if you want to learn how the Ruby VM works, but it also lets
3382 * you control various settings for the Ruby iseq compiler.
3383 *
3384 * You can find the source for the VM instructions in +insns.def+ in the Ruby
3385 * source.
3386 *
3387 * The instruction sequence results will almost certainly change as Ruby
3388 * changes, so example output in this documentation may be different from what
3389 * you see.
3390 */
3391
3392 void
Init_ISeq(void)3393 Init_ISeq(void)
3394 {
3395 /* declare ::RubyVM::InstructionSequence */
3396 rb_cISeq = rb_define_class_under(rb_cRubyVM, "InstructionSequence", rb_cObject);
3397 rb_undef_alloc_func(rb_cISeq);
3398 rb_define_method(rb_cISeq, "inspect", iseqw_inspect, 0);
3399 rb_define_method(rb_cISeq, "disasm", iseqw_disasm, 0);
3400 rb_define_method(rb_cISeq, "disassemble", iseqw_disasm, 0);
3401 rb_define_method(rb_cISeq, "to_a", iseqw_to_a, 0);
3402 rb_define_method(rb_cISeq, "eval", iseqw_eval, 0);
3403
3404 rb_define_method(rb_cISeq, "to_binary", iseqw_to_binary, -1);
3405 rb_define_singleton_method(rb_cISeq, "load_from_binary", iseqw_s_load_from_binary, 1);
3406 rb_define_singleton_method(rb_cISeq, "load_from_binary_extra_data", iseqw_s_load_from_binary_extra_data, 1);
3407
3408
3409 /* location APIs */
3410 rb_define_method(rb_cISeq, "path", iseqw_path, 0);
3411 rb_define_method(rb_cISeq, "absolute_path", iseqw_absolute_path, 0);
3412 rb_define_method(rb_cISeq, "label", iseqw_label, 0);
3413 rb_define_method(rb_cISeq, "base_label", iseqw_base_label, 0);
3414 rb_define_method(rb_cISeq, "first_lineno", iseqw_first_lineno, 0);
3415 rb_define_method(rb_cISeq, "trace_points", iseqw_trace_points, 0);
3416 rb_define_method(rb_cISeq, "each_child", iseqw_each_child, 0);
3417
3418 #if 0 /* TBD */
3419 rb_define_private_method(rb_cISeq, "marshal_dump", iseqw_marshal_dump, 0);
3420 rb_define_private_method(rb_cISeq, "marshal_load", iseqw_marshal_load, 1);
3421 /* disable this feature because there is no verifier. */
3422 rb_define_singleton_method(rb_cISeq, "load", iseq_s_load, -1);
3423 #endif
3424 (void)iseq_s_load;
3425
3426 rb_define_singleton_method(rb_cISeq, "compile", iseqw_s_compile, -1);
3427 rb_define_singleton_method(rb_cISeq, "new", iseqw_s_compile, -1);
3428 rb_define_singleton_method(rb_cISeq, "compile_file", iseqw_s_compile_file, -1);
3429 rb_define_singleton_method(rb_cISeq, "compile_option", iseqw_s_compile_option_get, 0);
3430 rb_define_singleton_method(rb_cISeq, "compile_option=", iseqw_s_compile_option_set, 1);
3431 rb_define_singleton_method(rb_cISeq, "disasm", iseqw_s_disasm, 1);
3432 rb_define_singleton_method(rb_cISeq, "disassemble", iseqw_s_disasm, 1);
3433 rb_define_singleton_method(rb_cISeq, "of", iseqw_s_of, 1);
3434
3435 rb_undef_method(CLASS_OF(rb_cISeq), "translate");
3436 rb_undef_method(CLASS_OF(rb_cISeq), "load_iseq");
3437 }
3438