Ruby 3.2.1p31 (2023-02-08 revision 31819e82c88c6f8ecfaeb162519bfa26a14b21fd)
iseq.c
1/**********************************************************************
2
3 iseq.c -
4
5 $Author$
6 created at: 2006-07-11(Tue) 09:00:03 +0900
7
8 Copyright (C) 2006 Koichi Sasada
9
10**********************************************************************/
11
12#define RUBY_VM_INSNS_INFO 1
13/* #define RUBY_MARK_FREE_DEBUG 1 */
14
15#include "ruby/internal/config.h"
16
17#ifdef HAVE_DLADDR
18# include <dlfcn.h>
19#endif
20
21#include "eval_intern.h"
22#include "gc.h"
23#include "id_table.h"
24#include "internal.h"
25#include "internal/bits.h"
26#include "internal/class.h"
27#include "internal/compile.h"
28#include "internal/error.h"
29#include "internal/file.h"
30#include "internal/hash.h"
31#include "internal/parse.h"
32#include "internal/sanitizers.h"
33#include "internal/symbol.h"
34#include "internal/thread.h"
35#include "internal/variable.h"
36#include "iseq.h"
37#include "mjit.h"
38#include "ruby/util.h"
39#include "vm_core.h"
40#include "vm_callinfo.h"
41#include "yjit.h"
42#include "ruby/ractor.h"
43#include "builtin.h"
44#include "insns.inc"
45#include "insns_info.inc"
46
47VALUE rb_cISeq;
48static VALUE iseqw_new(const rb_iseq_t *iseq);
49static const rb_iseq_t *iseqw_check(VALUE iseqw);
50
51#if VM_INSN_INFO_TABLE_IMPL == 2
52static struct succ_index_table *succ_index_table_create(int max_pos, int *data, int size);
53static unsigned int *succ_index_table_invert(int max_pos, struct succ_index_table *sd, int size);
54static int succ_index_lookup(const struct succ_index_table *sd, int x);
55#endif
56
57#define hidden_obj_p(obj) (!SPECIAL_CONST_P(obj) && !RBASIC(obj)->klass)
58
59static inline VALUE
60obj_resurrect(VALUE obj)
61{
62 if (hidden_obj_p(obj)) {
63 switch (BUILTIN_TYPE(obj)) {
64 case T_STRING:
65 obj = rb_str_resurrect(obj);
66 break;
67 case T_ARRAY:
68 obj = rb_ary_resurrect(obj);
69 break;
70 case T_HASH:
71 obj = rb_hash_resurrect(obj);
72 break;
73 default:
74 break;
75 }
76 }
77 return obj;
78}
79
80static void
81free_arena(struct iseq_compile_data_storage *cur)
82{
83 struct iseq_compile_data_storage *next;
84
85 while (cur) {
86 next = cur->next;
87 ruby_xfree(cur);
88 cur = next;
89 }
90}
91
92static void
93compile_data_free(struct iseq_compile_data *compile_data)
94{
95 if (compile_data) {
96 free_arena(compile_data->node.storage_head);
97 free_arena(compile_data->insn.storage_head);
98 if (compile_data->ivar_cache_table) {
99 rb_id_table_free(compile_data->ivar_cache_table);
100 }
101 ruby_xfree(compile_data);
102 }
103}
104
105static void
106remove_from_constant_cache(ID id, IC ic)
107{
108 rb_vm_t *vm = GET_VM();
109 VALUE lookup_result;
110 st_data_t ic_data = (st_data_t)ic;
111
112 if (rb_id_table_lookup(vm->constant_cache, id, &lookup_result)) {
113 st_table *ics = (st_table *)lookup_result;
114 st_delete(ics, &ic_data, NULL);
115
116 if (ics->num_entries == 0) {
117 rb_id_table_delete(vm->constant_cache, id);
118 st_free_table(ics);
119 }
120 }
121}
122
123// When an ISEQ is being freed, all of its associated ICs are going to go away
124// as well. Because of this, we need to iterate over the ICs, and clear them
125// from the VM's constant cache.
126static void
127iseq_clear_ic_references(const rb_iseq_t *iseq)
128{
129 // In some cases (when there is a compilation error), we end up with
130 // ic_size greater than 0, but no allocated is_entries buffer.
131 // If there's no is_entries buffer to loop through, return early.
132 // [Bug #19173]
133 if (!ISEQ_BODY(iseq)->is_entries) {
134 return;
135 }
136
137 for (unsigned int ic_idx = 0; ic_idx < ISEQ_BODY(iseq)->ic_size; ic_idx++) {
138 IC ic = &ISEQ_IS_IC_ENTRY(ISEQ_BODY(iseq), ic_idx);
139
140 // Iterate over the IC's constant path's segments and clean any references to
141 // the ICs out of the VM's constant cache table.
142 const ID *segments = ic->segments;
143
144 // It's possible that segments is NULL if we overallocated an IC but
145 // optimizations removed the instruction using it
146 if (segments == NULL)
147 continue;
148
149 for (int i = 0; segments[i]; i++) {
150 ID id = segments[i];
151 if (id == idNULL) continue;
152 remove_from_constant_cache(id, ic);
153 }
154
155 ruby_xfree((void *)segments);
156 }
157}
158
159void
160rb_iseq_free(const rb_iseq_t *iseq)
161{
162 RUBY_FREE_ENTER("iseq");
163
164 if (iseq && ISEQ_BODY(iseq)) {
165 iseq_clear_ic_references(iseq);
166 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
167 mjit_free_iseq(iseq); /* Notify MJIT */
168#if USE_YJIT
169 rb_yjit_iseq_free(body->yjit_payload);
170#endif
171 ruby_xfree((void *)body->iseq_encoded);
172 ruby_xfree((void *)body->insns_info.body);
173 if (body->insns_info.positions) ruby_xfree((void *)body->insns_info.positions);
174#if VM_INSN_INFO_TABLE_IMPL == 2
175 if (body->insns_info.succ_index_table) ruby_xfree(body->insns_info.succ_index_table);
176#endif
177 if (LIKELY(body->local_table != rb_iseq_shared_exc_local_tbl))
178 ruby_xfree((void *)body->local_table);
179 ruby_xfree((void *)body->is_entries);
180
181 if (body->call_data) {
182 ruby_xfree(body->call_data);
183 }
184 ruby_xfree((void *)body->catch_table);
185 ruby_xfree((void *)body->param.opt_table);
186 if (ISEQ_MBITS_BUFLEN(body->iseq_size) > 1 && body->mark_bits.list) {
187 ruby_xfree((void *)body->mark_bits.list);
188 }
189
190 if (body->param.keyword != NULL) {
191 ruby_xfree((void *)body->param.keyword->default_values);
192 ruby_xfree((void *)body->param.keyword);
193 }
194 compile_data_free(ISEQ_COMPILE_DATA(iseq));
195 if (body->outer_variables) rb_id_table_free(body->outer_variables);
196 ruby_xfree(body);
197 }
198
199 if (iseq && ISEQ_EXECUTABLE_P(iseq) && iseq->aux.exec.local_hooks) {
200 rb_hook_list_free(iseq->aux.exec.local_hooks);
201 }
202
203 RUBY_FREE_LEAVE("iseq");
204}
205
206typedef VALUE iseq_value_itr_t(void *ctx, VALUE obj);
207
208static inline void
209iseq_scan_bits(unsigned int page, iseq_bits_t bits, VALUE *code, iseq_value_itr_t *func, void *data)
210{
211 unsigned int offset;
212 unsigned int page_offset = (page * ISEQ_MBITS_BITLENGTH);
213
214 while (bits) {
215 offset = ntz_intptr(bits);
216 VALUE op = code[page_offset + offset];
217 VALUE newop = func(data, op);
218 if (newop != op) {
219 code[page_offset + offset] = newop;
220 if (data) {
221 VALUE *original_iseq = (VALUE *)data;
222 original_iseq[page_offset + offset] = newop;
223 }
224 }
225 bits &= bits - 1; // Reset Lowest Set Bit (BLSR)
226 }
227}
228
229static void
230rb_iseq_each_value(const rb_iseq_t *iseq, iseq_value_itr_t * func, void *data)
231{
232 unsigned int size;
233 VALUE *code;
234 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
235
236 size = body->iseq_size;
237 code = body->iseq_encoded;
238
239 union iseq_inline_storage_entry *is_entries = body->is_entries;
240
241 if (body->is_entries) {
242 // Skip iterating over ivc caches
243 is_entries += body->ivc_size;
244
245 // ICVARC entries
246 for (unsigned int i = 0; i < body->icvarc_size; i++, is_entries++) {
247 ICVARC icvarc = (ICVARC)is_entries;
248 if (icvarc->entry) {
249 RUBY_ASSERT(!RB_TYPE_P(icvarc->entry->class_value, T_NONE));
250
251 VALUE nv = func(data, icvarc->entry->class_value);
252 if (icvarc->entry->class_value != nv) {
253 icvarc->entry->class_value = nv;
254 }
255 }
256 }
257
258 // ISE entries
259 for (unsigned int i = 0; i < body->ise_size; i++, is_entries++) {
260 union iseq_inline_storage_entry *const is = (union iseq_inline_storage_entry *)is_entries;
261 if (is->once.value) {
262 VALUE nv = func(data, is->once.value);
263 if (is->once.value != nv) {
264 is->once.value = nv;
265 }
266 }
267 }
268
269 // IC Entries
270 for (unsigned int i = 0; i < body->ic_size; i++, is_entries++) {
271 IC ic = (IC)is_entries;
272 if (ic->entry) {
273 VALUE nv = func(data, (VALUE)ic->entry);
274 if ((VALUE)ic->entry != nv) {
275 ic->entry = (void *)nv;
276 }
277 }
278 }
279 }
280
281 // Embedded VALUEs
282 if (body->mark_bits.list) {
283 if (ISEQ_MBITS_BUFLEN(size) == 1) {
284 iseq_scan_bits(0, body->mark_bits.single, code, func, data);
285 }
286 else {
287 if (body->mark_bits.list) {
288 for (unsigned int i = 0; i < ISEQ_MBITS_BUFLEN(size); i++) {
289 iseq_bits_t bits = body->mark_bits.list[i];
290 iseq_scan_bits(i, bits, code, func, data);
291 }
292 }
293 }
294 }
295}
296
297static VALUE
298update_each_insn_value(void *ctx, VALUE obj)
299{
300 return rb_gc_location(obj);
301}
302
303void
304rb_iseq_update_references(rb_iseq_t *iseq)
305{
306 if (ISEQ_BODY(iseq)) {
307 struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
308
309 body->variable.coverage = rb_gc_location(body->variable.coverage);
310 body->variable.pc2branchindex = rb_gc_location(body->variable.pc2branchindex);
311 body->variable.script_lines = rb_gc_location(body->variable.script_lines);
312 body->location.label = rb_gc_location(body->location.label);
313 body->location.base_label = rb_gc_location(body->location.base_label);
314 body->location.pathobj = rb_gc_location(body->location.pathobj);
315 if (body->local_iseq) {
316 body->local_iseq = (struct rb_iseq_struct *)rb_gc_location((VALUE)body->local_iseq);
317 }
318 if (body->parent_iseq) {
319 body->parent_iseq = (struct rb_iseq_struct *)rb_gc_location((VALUE)body->parent_iseq);
320 }
321 if (body->mandatory_only_iseq) {
322 body->mandatory_only_iseq = (struct rb_iseq_struct *)rb_gc_location((VALUE)body->mandatory_only_iseq);
323 }
324 if (body->call_data) {
325 for (unsigned int i=0; i<body->ci_size; i++) {
326 struct rb_call_data *cds = body->call_data;
327 if (!SPECIAL_CONST_P((VALUE)cds[i].ci)) {
328 cds[i].ci = (struct rb_callinfo *)rb_gc_location((VALUE)cds[i].ci);
329 }
330 cds[i].cc = (struct rb_callcache *)rb_gc_location((VALUE)cds[i].cc);
331 }
332 }
333 VALUE *original_iseq = ISEQ_ORIGINAL_ISEQ(iseq);
334 rb_iseq_each_value(iseq, update_each_insn_value, (void *)original_iseq);
335
336 if (body->param.flags.has_kw && ISEQ_COMPILE_DATA(iseq) == NULL) {
337 int i, j;
338
339 i = body->param.keyword->required_num;
340
341 for (j = 0; i < body->param.keyword->num; i++, j++) {
342 VALUE obj = body->param.keyword->default_values[j];
343 if (!UNDEF_P(obj)) {
344 body->param.keyword->default_values[j] = rb_gc_location(obj);
345 }
346 }
347 }
348
349 if (body->catch_table) {
350 struct iseq_catch_table *table = body->catch_table;
351 unsigned int i;
352 for (i = 0; i < table->size; i++) {
353 struct iseq_catch_table_entry *entry;
354 entry = UNALIGNED_MEMBER_PTR(table, entries[i]);
355 if (entry->iseq) {
356 entry->iseq = (rb_iseq_t *)rb_gc_location((VALUE)entry->iseq);
357 }
358 }
359 }
360#if USE_MJIT
361 mjit_update_references(iseq);
362#endif
363#if USE_YJIT
364 rb_yjit_iseq_update_references(body->yjit_payload);
365#endif
366 }
367}
368
369static VALUE
370each_insn_value(void *ctx, VALUE obj)
371{
372 rb_gc_mark_movable(obj);
373 return obj;
374}
375
376void
377rb_iseq_mark(const rb_iseq_t *iseq)
378{
379 RUBY_MARK_ENTER("iseq");
380
381 RUBY_MARK_UNLESS_NULL(iseq->wrapper);
382
383 if (ISEQ_BODY(iseq)) {
384 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
385
386 rb_iseq_each_value(iseq, each_insn_value, NULL);
387
388 rb_gc_mark_movable(body->variable.coverage);
389 rb_gc_mark_movable(body->variable.pc2branchindex);
390 rb_gc_mark_movable(body->variable.script_lines);
391 rb_gc_mark_movable(body->location.label);
392 rb_gc_mark_movable(body->location.base_label);
393 rb_gc_mark_movable(body->location.pathobj);
394 RUBY_MARK_MOVABLE_UNLESS_NULL((VALUE)body->mandatory_only_iseq);
395 RUBY_MARK_MOVABLE_UNLESS_NULL((VALUE)body->parent_iseq);
396
397 if (body->call_data) {
398 struct rb_call_data *cds = (struct rb_call_data *)body->call_data;
399 for (unsigned int i=0; i<body->ci_size; i++) {
400 const struct rb_callinfo *ci = cds[i].ci;
401 const struct rb_callcache *cc = cds[i].cc;
402
403 if (vm_ci_markable(ci)) {
404 rb_gc_mark_movable((VALUE)ci);
405 }
406
407 if (cc) {
408 VM_ASSERT((cc->flags & VM_CALLCACHE_ON_STACK) == 0);
409
410 if (vm_cc_markable(cc)) {
411 if (!vm_cc_invalidated_p(cc)) {
412 rb_gc_mark_movable((VALUE)cc);
413 }
414 else {
415 cds[i].cc = rb_vm_empty_cc();
416 }
417 }
418 }
419 }
420 }
421
422 if (body->param.flags.has_kw && ISEQ_COMPILE_DATA(iseq) == NULL) {
423 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
424 int i, j;
425
426 i = keyword->required_num;
427
428 for (j = 0; i < keyword->num; i++, j++) {
429 VALUE obj = keyword->default_values[j];
430 if (!SPECIAL_CONST_P(obj)) {
431 rb_gc_mark_movable(obj);
432 }
433 }
434 }
435
436 if (body->catch_table) {
437 const struct iseq_catch_table *table = body->catch_table;
438 unsigned int i;
439 for (i = 0; i < table->size; i++) {
440 const struct iseq_catch_table_entry *entry;
441 entry = UNALIGNED_MEMBER_PTR(table, entries[i]);
442 if (entry->iseq) {
443 rb_gc_mark_movable((VALUE)entry->iseq);
444 }
445 }
446 }
447
448#if USE_MJIT
449 mjit_mark_cc_entries(body);
450#endif
451#if USE_YJIT
452 rb_yjit_iseq_mark(body->yjit_payload);
453#endif
454 }
455
456 if (FL_TEST_RAW((VALUE)iseq, ISEQ_NOT_LOADED_YET)) {
457 rb_gc_mark(iseq->aux.loader.obj);
458 }
459 else if (FL_TEST_RAW((VALUE)iseq, ISEQ_USE_COMPILE_DATA)) {
460 const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq);
461
462 rb_iseq_mark_insn_storage(compile_data->insn.storage_head);
463
464 RUBY_MARK_UNLESS_NULL(compile_data->err_info);
465 if (RTEST(compile_data->catch_table_ary)) {
466 rb_gc_mark(compile_data->catch_table_ary);
467 }
468 VM_ASSERT(compile_data != NULL);
469 }
470 else {
471 /* executable */
472 VM_ASSERT(ISEQ_EXECUTABLE_P(iseq));
473 if (iseq->aux.exec.local_hooks) {
474 rb_hook_list_mark(iseq->aux.exec.local_hooks);
475 }
476 }
477
478 RUBY_MARK_LEAVE("iseq");
479}
480
481static size_t
482param_keyword_size(const struct rb_iseq_param_keyword *pkw)
483{
484 size_t size = 0;
485
486 if (!pkw) return size;
487
488 size += sizeof(struct rb_iseq_param_keyword);
489 size += sizeof(VALUE) * (pkw->num - pkw->required_num);
490
491 return size;
492}
493
494size_t
495rb_iseq_memsize(const rb_iseq_t *iseq)
496{
497 size_t size = 0; /* struct already counted as RVALUE size */
498 const struct rb_iseq_constant_body *body = ISEQ_BODY(iseq);
499 const struct iseq_compile_data *compile_data;
500
501 /* TODO: should we count original_iseq? */
502
503 if (ISEQ_EXECUTABLE_P(iseq) && body) {
504 size += sizeof(struct rb_iseq_constant_body);
505 size += body->iseq_size * sizeof(VALUE);
506 size += body->insns_info.size * (sizeof(struct iseq_insn_info_entry) + sizeof(unsigned int));
507 size += body->local_table_size * sizeof(ID);
508 size += ISEQ_MBITS_BUFLEN(body->iseq_size) * ISEQ_MBITS_SIZE;
509 if (body->catch_table) {
510 size += iseq_catch_table_bytes(body->catch_table->size);
511 }
512 size += (body->param.opt_num + 1) * sizeof(VALUE);
513 size += param_keyword_size(body->param.keyword);
514
515 /* body->is_entries */
516 size += ISEQ_IS_SIZE(body) * sizeof(union iseq_inline_storage_entry);
517
518 if (ISEQ_BODY(iseq)->is_entries) {
519 /* IC entries constant segments */
520 for (unsigned int ic_idx = 0; ic_idx < body->ic_size; ic_idx++) {
521 IC ic = &ISEQ_IS_IC_ENTRY(body, ic_idx);
522 const ID *ids = ic->segments;
523 if (!ids) continue;
524 while (*ids++) {
525 size += sizeof(ID);
526 }
527 size += sizeof(ID); // null terminator
528 }
529 }
530
531 /* body->call_data */
532 size += body->ci_size * sizeof(struct rb_call_data);
533 // TODO: should we count imemo_callinfo?
534 }
535
536 compile_data = ISEQ_COMPILE_DATA(iseq);
537 if (compile_data) {
538 struct iseq_compile_data_storage *cur;
539
540 size += sizeof(struct iseq_compile_data);
541
542 cur = compile_data->node.storage_head;
543 while (cur) {
544 size += cur->size + offsetof(struct iseq_compile_data_storage, buff);
545 cur = cur->next;
546 }
547 }
548
549 return size;
550}
551
553rb_iseq_constant_body_alloc(void)
554{
555 struct rb_iseq_constant_body *iseq_body;
556 iseq_body = ZALLOC(struct rb_iseq_constant_body);
557 return iseq_body;
558}
559
560static rb_iseq_t *
561iseq_alloc(void)
562{
563 rb_iseq_t *iseq = iseq_imemo_alloc();
564 ISEQ_BODY(iseq) = rb_iseq_constant_body_alloc();
565 return iseq;
566}
567
568VALUE
569rb_iseq_pathobj_new(VALUE path, VALUE realpath)
570{
571 VALUE pathobj;
572 VM_ASSERT(RB_TYPE_P(path, T_STRING));
573 VM_ASSERT(NIL_P(realpath) || RB_TYPE_P(realpath, T_STRING));
574
575 if (path == realpath ||
576 (!NIL_P(realpath) && rb_str_cmp(path, realpath) == 0)) {
577 pathobj = rb_fstring(path);
578 }
579 else {
580 if (!NIL_P(realpath)) realpath = rb_fstring(realpath);
581 pathobj = rb_ary_new_from_args(2, rb_fstring(path), realpath);
582 rb_obj_freeze(pathobj);
583 }
584 return pathobj;
585}
586
587void
588rb_iseq_pathobj_set(const rb_iseq_t *iseq, VALUE path, VALUE realpath)
589{
590 RB_OBJ_WRITE(iseq, &ISEQ_BODY(iseq)->location.pathobj,
591 rb_iseq_pathobj_new(path, realpath));
592}
593
594static rb_iseq_location_t *
595iseq_location_setup(rb_iseq_t *iseq, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id)
596{
597 rb_iseq_location_t *loc = &ISEQ_BODY(iseq)->location;
598
599 rb_iseq_pathobj_set(iseq, path, realpath);
600 RB_OBJ_WRITE(iseq, &loc->label, name);
601 RB_OBJ_WRITE(iseq, &loc->base_label, name);
602 loc->first_lineno = first_lineno;
603 if (code_location) {
604 loc->node_id = node_id;
605 loc->code_location = *code_location;
606 }
607 else {
608 loc->code_location.beg_pos.lineno = 0;
609 loc->code_location.beg_pos.column = 0;
610 loc->code_location.end_pos.lineno = -1;
611 loc->code_location.end_pos.column = -1;
612 }
613
614 return loc;
615}
616
617static void
618set_relation(rb_iseq_t *iseq, const rb_iseq_t *piseq)
619{
620 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
621 const VALUE type = body->type;
622
623 /* set class nest stack */
624 if (type == ISEQ_TYPE_TOP) {
625 body->local_iseq = iseq;
626 }
627 else if (type == ISEQ_TYPE_METHOD || type == ISEQ_TYPE_CLASS) {
628 body->local_iseq = iseq;
629 }
630 else if (piseq) {
631 body->local_iseq = ISEQ_BODY(piseq)->local_iseq;
632 }
633
634 if (piseq) {
635 body->parent_iseq = piseq;
636 }
637
638 if (type == ISEQ_TYPE_MAIN) {
639 body->local_iseq = iseq;
640 }
641}
642
643static struct iseq_compile_data_storage *
644new_arena(void)
645{
646 struct iseq_compile_data_storage * new_arena =
648 ALLOC_N(char, INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE +
649 offsetof(struct iseq_compile_data_storage, buff));
650
651 new_arena->pos = 0;
652 new_arena->next = 0;
653 new_arena->size = INITIAL_ISEQ_COMPILE_DATA_STORAGE_BUFF_SIZE;
654
655 return new_arena;
656}
657
658static VALUE
659prepare_iseq_build(rb_iseq_t *iseq,
660 VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_code_location_t *code_location, const int node_id,
661 const rb_iseq_t *parent, int isolated_depth, enum rb_iseq_type type,
662 VALUE script_lines, const rb_compile_option_t *option)
663{
664 VALUE coverage = Qfalse;
665 VALUE err_info = Qnil;
666 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
667
668 if (parent && (type == ISEQ_TYPE_MAIN || type == ISEQ_TYPE_TOP))
669 err_info = Qfalse;
670
671 body->type = type;
672 set_relation(iseq, parent);
673
674 name = rb_fstring(name);
675 iseq_location_setup(iseq, name, path, realpath, first_lineno, code_location, node_id);
676 if (iseq != body->local_iseq) {
677 RB_OBJ_WRITE(iseq, &body->location.base_label, ISEQ_BODY(body->local_iseq)->location.label);
678 }
679 ISEQ_COVERAGE_SET(iseq, Qnil);
680 ISEQ_ORIGINAL_ISEQ_CLEAR(iseq);
681 body->variable.flip_count = 0;
682
683 if (NIL_P(script_lines)) {
684 RB_OBJ_WRITE(iseq, &body->variable.script_lines, Qnil);
685 }
686 else {
687 RB_OBJ_WRITE(iseq, &body->variable.script_lines, rb_ractor_make_shareable(script_lines));
688 }
689
690 ISEQ_COMPILE_DATA_ALLOC(iseq);
691 RB_OBJ_WRITE(iseq, &ISEQ_COMPILE_DATA(iseq)->err_info, err_info);
692 RB_OBJ_WRITE(iseq, &ISEQ_COMPILE_DATA(iseq)->catch_table_ary, Qnil);
693
694 ISEQ_COMPILE_DATA(iseq)->node.storage_head = ISEQ_COMPILE_DATA(iseq)->node.storage_current = new_arena();
695 ISEQ_COMPILE_DATA(iseq)->insn.storage_head = ISEQ_COMPILE_DATA(iseq)->insn.storage_current = new_arena();
696 ISEQ_COMPILE_DATA(iseq)->isolated_depth = isolated_depth;
697 ISEQ_COMPILE_DATA(iseq)->option = option;
698 ISEQ_COMPILE_DATA(iseq)->ivar_cache_table = NULL;
699 ISEQ_COMPILE_DATA(iseq)->builtin_function_table = GET_VM()->builtin_function_table;
700
701 if (option->coverage_enabled) {
702 VALUE coverages = rb_get_coverages();
703 if (RTEST(coverages)) {
704 coverage = rb_hash_lookup(coverages, rb_iseq_path(iseq));
705 if (NIL_P(coverage)) coverage = Qfalse;
706 }
707 }
708 ISEQ_COVERAGE_SET(iseq, coverage);
709 if (coverage && ISEQ_BRANCH_COVERAGE(iseq))
710 ISEQ_PC2BRANCHINDEX_SET(iseq, rb_ary_hidden_new(0));
711
712 return Qtrue;
713}
714
715#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
716static void validate_get_insn_info(const rb_iseq_t *iseq);
717#endif
718
719void
720rb_iseq_insns_info_encode_positions(const rb_iseq_t *iseq)
721{
722#if VM_INSN_INFO_TABLE_IMPL == 2
723 /* create succ_index_table */
724 struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
725 int size = body->insns_info.size;
726 int max_pos = body->iseq_size;
727 int *data = (int *)body->insns_info.positions;
728 if (body->insns_info.succ_index_table) ruby_xfree(body->insns_info.succ_index_table);
729 body->insns_info.succ_index_table = succ_index_table_create(max_pos, data, size);
730#if VM_CHECK_MODE == 0
731 ruby_xfree(body->insns_info.positions);
732 body->insns_info.positions = NULL;
733#endif
734#endif
735}
736
737#if VM_INSN_INFO_TABLE_IMPL == 2
738unsigned int *
739rb_iseq_insns_info_decode_positions(const struct rb_iseq_constant_body *body)
740{
741 int size = body->insns_info.size;
742 int max_pos = body->iseq_size;
743 struct succ_index_table *sd = body->insns_info.succ_index_table;
744 return succ_index_table_invert(max_pos, sd, size);
745}
746#endif
747
748void
749rb_iseq_init_trace(rb_iseq_t *iseq)
750{
751 iseq->aux.exec.global_trace_events = 0;
752 if (ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS) {
753 rb_iseq_trace_set(iseq, ruby_vm_event_enabled_global_flags & ISEQ_TRACE_EVENTS);
754 }
755}
756
757static VALUE
758finish_iseq_build(rb_iseq_t *iseq)
759{
760 struct iseq_compile_data *data = ISEQ_COMPILE_DATA(iseq);
761 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
762 VALUE err = data->err_info;
763 ISEQ_COMPILE_DATA_CLEAR(iseq);
764 compile_data_free(data);
765
766#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
767 validate_get_insn_info(iseq);
768#endif
769
770 if (RTEST(err)) {
771 VALUE path = pathobj_path(body->location.pathobj);
772 if (err == Qtrue) err = rb_exc_new_cstr(rb_eSyntaxError, "compile error");
773 rb_funcallv(err, rb_intern("set_backtrace"), 1, &path);
774 rb_exc_raise(err);
775 }
776
777 RB_DEBUG_COUNTER_INC(iseq_num);
778 RB_DEBUG_COUNTER_ADD(iseq_cd_num, ISEQ_BODY(iseq)->ci_size);
779
780 rb_iseq_init_trace(iseq);
781 return Qtrue;
782}
783
784static rb_compile_option_t COMPILE_OPTION_DEFAULT = {
785 OPT_INLINE_CONST_CACHE, /* int inline_const_cache; */
786 OPT_PEEPHOLE_OPTIMIZATION, /* int peephole_optimization; */
787 OPT_TAILCALL_OPTIMIZATION, /* int tailcall_optimization */
788 OPT_SPECIALISED_INSTRUCTION, /* int specialized_instruction; */
789 OPT_OPERANDS_UNIFICATION, /* int operands_unification; */
790 OPT_INSTRUCTIONS_UNIFICATION, /* int instructions_unification; */
791 OPT_STACK_CACHING, /* int stack_caching; */
792 OPT_FROZEN_STRING_LITERAL,
793 OPT_DEBUG_FROZEN_STRING_LITERAL,
794 TRUE, /* coverage_enabled */
795};
796
797static const rb_compile_option_t COMPILE_OPTION_FALSE = {0};
798
799static void
800set_compile_option_from_hash(rb_compile_option_t *option, VALUE opt)
801{
802#define SET_COMPILE_OPTION(o, h, mem) \
803 { VALUE flag = rb_hash_aref((h), ID2SYM(rb_intern(#mem))); \
804 if (flag == Qtrue) { (o)->mem = 1; } \
805 else if (flag == Qfalse) { (o)->mem = 0; } \
806 }
807#define SET_COMPILE_OPTION_NUM(o, h, mem) \
808 { VALUE num = rb_hash_aref(opt, ID2SYM(rb_intern(#mem))); \
809 if (!NIL_P(num)) (o)->mem = NUM2INT(num); \
810 }
811 SET_COMPILE_OPTION(option, opt, inline_const_cache);
812 SET_COMPILE_OPTION(option, opt, peephole_optimization);
813 SET_COMPILE_OPTION(option, opt, tailcall_optimization);
814 SET_COMPILE_OPTION(option, opt, specialized_instruction);
815 SET_COMPILE_OPTION(option, opt, operands_unification);
816 SET_COMPILE_OPTION(option, opt, instructions_unification);
817 SET_COMPILE_OPTION(option, opt, stack_caching);
818 SET_COMPILE_OPTION(option, opt, frozen_string_literal);
819 SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal);
820 SET_COMPILE_OPTION(option, opt, coverage_enabled);
821 SET_COMPILE_OPTION_NUM(option, opt, debug_level);
822#undef SET_COMPILE_OPTION
823#undef SET_COMPILE_OPTION_NUM
824}
825
826static void
827rb_iseq_make_compile_option(rb_compile_option_t *option, VALUE opt)
828{
829 Check_Type(opt, T_HASH);
830 set_compile_option_from_hash(option, opt);
831}
832
833static void
834make_compile_option(rb_compile_option_t *option, VALUE opt)
835{
836 if (NIL_P(opt)) {
837 *option = COMPILE_OPTION_DEFAULT;
838 }
839 else if (opt == Qfalse) {
840 *option = COMPILE_OPTION_FALSE;
841 }
842 else if (opt == Qtrue) {
843 int i;
844 for (i = 0; i < (int)(sizeof(rb_compile_option_t) / sizeof(int)); ++i)
845 ((int *)option)[i] = 1;
846 }
847 else if (RB_TYPE_P(opt, T_HASH)) {
848 *option = COMPILE_OPTION_DEFAULT;
849 set_compile_option_from_hash(option, opt);
850 }
851 else {
852 rb_raise(rb_eTypeError, "Compile option must be Hash/true/false/nil");
853 }
854}
855
856static VALUE
857make_compile_option_value(rb_compile_option_t *option)
858{
859 VALUE opt = rb_hash_new_with_size(11);
860#define SET_COMPILE_OPTION(o, h, mem) \
861 rb_hash_aset((h), ID2SYM(rb_intern(#mem)), RBOOL((o)->mem))
862#define SET_COMPILE_OPTION_NUM(o, h, mem) \
863 rb_hash_aset((h), ID2SYM(rb_intern(#mem)), INT2NUM((o)->mem))
864 {
865 SET_COMPILE_OPTION(option, opt, inline_const_cache);
866 SET_COMPILE_OPTION(option, opt, peephole_optimization);
867 SET_COMPILE_OPTION(option, opt, tailcall_optimization);
868 SET_COMPILE_OPTION(option, opt, specialized_instruction);
869 SET_COMPILE_OPTION(option, opt, operands_unification);
870 SET_COMPILE_OPTION(option, opt, instructions_unification);
871 SET_COMPILE_OPTION(option, opt, stack_caching);
872 SET_COMPILE_OPTION(option, opt, frozen_string_literal);
873 SET_COMPILE_OPTION(option, opt, debug_frozen_string_literal);
874 SET_COMPILE_OPTION(option, opt, coverage_enabled);
875 SET_COMPILE_OPTION_NUM(option, opt, debug_level);
876 }
877#undef SET_COMPILE_OPTION
878#undef SET_COMPILE_OPTION_NUM
879 return opt;
880}
881
882rb_iseq_t *
883rb_iseq_new(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath,
884 const rb_iseq_t *parent, enum rb_iseq_type type)
885{
886 return rb_iseq_new_with_opt(ast, name, path, realpath, 0, parent,
887 0, type, &COMPILE_OPTION_DEFAULT);
888}
889
890static int
891ast_line_count(const rb_ast_body_t *ast)
892{
893 if (ast->script_lines == Qfalse) {
894 // this occurs when failed to parse the source code with a syntax error
895 return 0;
896 }
897 if (RB_TYPE_P(ast->script_lines, T_ARRAY)){
898 return (int)RARRAY_LEN(ast->script_lines);
899 }
900 return FIX2INT(ast->script_lines);
901}
902
903static VALUE
904iseq_setup_coverage(VALUE coverages, VALUE path, const rb_ast_body_t *ast, int line_offset)
905{
906 int line_count = line_offset + ast_line_count(ast);
907
908 if (line_count >= 0) {
909 int len = (rb_get_coverage_mode() & COVERAGE_TARGET_ONESHOT_LINES) ? 0 : line_count;
910
911 VALUE coverage = rb_default_coverage(len);
912 rb_hash_aset(coverages, path, coverage);
913
914 return coverage;
915 }
916
917 return Qnil;
918}
919
920static inline void
921iseq_new_setup_coverage(VALUE path, const rb_ast_body_t *ast, int line_offset)
922{
923 VALUE coverages = rb_get_coverages();
924
925 if (RTEST(coverages)) {
926 iseq_setup_coverage(coverages, path, ast, 0);
927 }
928}
929
930rb_iseq_t *
931rb_iseq_new_top(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, const rb_iseq_t *parent)
932{
933 iseq_new_setup_coverage(path, ast, 0);
934
935 return rb_iseq_new_with_opt(ast, name, path, realpath, 0, parent, 0,
936 ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT);
937}
938
939rb_iseq_t *
940rb_iseq_new_main(const rb_ast_body_t *ast, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt)
941{
942 iseq_new_setup_coverage(path, ast, 0);
943
944 return rb_iseq_new_with_opt(ast, rb_fstring_lit("<main>"),
945 path, realpath, 0,
946 parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE);
947}
948
949rb_iseq_t *
950rb_iseq_new_eval(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_iseq_t *parent, int isolated_depth)
951{
952 if (rb_get_coverage_mode() & COVERAGE_TARGET_EVAL) {
953 VALUE coverages = rb_get_coverages();
954 if (RTEST(coverages) && RTEST(path) && !RTEST(rb_hash_has_key(coverages, path))) {
955 iseq_setup_coverage(coverages, path, ast, first_lineno - 1);
956 }
957 }
958
959 return rb_iseq_new_with_opt(ast, name, path, realpath, first_lineno,
960 parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT);
961}
962
963static inline rb_iseq_t *
964iseq_translate(rb_iseq_t *iseq)
965{
966 if (rb_respond_to(rb_cISeq, rb_intern("translate"))) {
967 VALUE v1 = iseqw_new(iseq);
968 VALUE v2 = rb_funcall(rb_cISeq, rb_intern("translate"), 1, v1);
969 if (v1 != v2 && CLASS_OF(v2) == rb_cISeq) {
970 iseq = (rb_iseq_t *)iseqw_check(v2);
971 }
972 }
973
974 return iseq;
975}
976
977rb_iseq_t *
978rb_iseq_new_with_opt(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath,
979 int first_lineno, const rb_iseq_t *parent, int isolated_depth,
980 enum rb_iseq_type type, const rb_compile_option_t *option)
981{
982 const NODE *node = ast ? ast->root : 0;
983 /* TODO: argument check */
984 rb_iseq_t *iseq = iseq_alloc();
985 rb_compile_option_t new_opt;
986
987 if (option) {
988 new_opt = *option;
989 }
990 else {
991 new_opt = COMPILE_OPTION_DEFAULT;
992 }
993 if (ast && ast->compile_option) rb_iseq_make_compile_option(&new_opt, ast->compile_option);
994
995 VALUE script_lines = Qnil;
996
997 if (ast && !FIXNUM_P(ast->script_lines) && ast->script_lines) {
998 script_lines = ast->script_lines;
999 }
1000 else if (parent) {
1001 script_lines = ISEQ_BODY(parent)->variable.script_lines;
1002 }
1003
1004 prepare_iseq_build(iseq, name, path, realpath, first_lineno, node ? &node->nd_loc : NULL, node ? nd_node_id(node) : -1,
1005 parent, isolated_depth, type, script_lines, &new_opt);
1006
1007 rb_iseq_compile_node(iseq, node);
1008 finish_iseq_build(iseq);
1009
1010 return iseq_translate(iseq);
1011}
1012
1013rb_iseq_t *
1014rb_iseq_new_with_callback(
1015 const struct rb_iseq_new_with_callback_callback_func * ifunc,
1016 VALUE name, VALUE path, VALUE realpath,
1017 int first_lineno, const rb_iseq_t *parent,
1018 enum rb_iseq_type type, const rb_compile_option_t *option)
1019{
1020 /* TODO: argument check */
1021 rb_iseq_t *iseq = iseq_alloc();
1022
1023 if (!option) option = &COMPILE_OPTION_DEFAULT;
1024 prepare_iseq_build(iseq, name, path, realpath, first_lineno, NULL, -1, parent, 0, type, Qnil, option);
1025
1026 rb_iseq_compile_callback(iseq, ifunc);
1027 finish_iseq_build(iseq);
1028
1029 return iseq;
1030}
1031
1032const rb_iseq_t *
1033rb_iseq_load_iseq(VALUE fname)
1034{
1035 VALUE iseqv = rb_check_funcall(rb_cISeq, rb_intern("load_iseq"), 1, &fname);
1036
1037 if (!SPECIAL_CONST_P(iseqv) && RBASIC_CLASS(iseqv) == rb_cISeq) {
1038 return iseqw_check(iseqv);
1039 }
1040
1041 return NULL;
1042}
1043
1044#define CHECK_ARRAY(v) rb_to_array_type(v)
1045#define CHECK_HASH(v) rb_to_hash_type(v)
1046#define CHECK_STRING(v) rb_str_to_str(v)
1047#define CHECK_SYMBOL(v) rb_to_symbol_type(v)
1048static inline VALUE CHECK_INTEGER(VALUE v) {(void)NUM2LONG(v); return v;}
1049
1050static enum rb_iseq_type
1051iseq_type_from_sym(VALUE type)
1052{
1053 const ID id_top = rb_intern("top");
1054 const ID id_method = rb_intern("method");
1055 const ID id_block = rb_intern("block");
1056 const ID id_class = rb_intern("class");
1057 const ID id_rescue = rb_intern("rescue");
1058 const ID id_ensure = rb_intern("ensure");
1059 const ID id_eval = rb_intern("eval");
1060 const ID id_main = rb_intern("main");
1061 const ID id_plain = rb_intern("plain");
1062 /* ensure all symbols are static or pinned down before
1063 * conversion */
1064 const ID typeid = rb_check_id(&type);
1065 if (typeid == id_top) return ISEQ_TYPE_TOP;
1066 if (typeid == id_method) return ISEQ_TYPE_METHOD;
1067 if (typeid == id_block) return ISEQ_TYPE_BLOCK;
1068 if (typeid == id_class) return ISEQ_TYPE_CLASS;
1069 if (typeid == id_rescue) return ISEQ_TYPE_RESCUE;
1070 if (typeid == id_ensure) return ISEQ_TYPE_ENSURE;
1071 if (typeid == id_eval) return ISEQ_TYPE_EVAL;
1072 if (typeid == id_main) return ISEQ_TYPE_MAIN;
1073 if (typeid == id_plain) return ISEQ_TYPE_PLAIN;
1074 return (enum rb_iseq_type)-1;
1075}
1076
1077static VALUE
1078iseq_load(VALUE data, const rb_iseq_t *parent, VALUE opt)
1079{
1080 rb_iseq_t *iseq = iseq_alloc();
1081
1082 VALUE magic, version1, version2, format_type, misc;
1083 VALUE name, path, realpath, code_location, node_id;
1084 VALUE type, body, locals, params, exception;
1085
1086 st_data_t iseq_type;
1087 rb_compile_option_t option;
1088 int i = 0;
1089 rb_code_location_t tmp_loc = { {0, 0}, {-1, -1} };
1090
1091 /* [magic, major_version, minor_version, format_type, misc,
1092 * label, path, first_lineno,
1093 * type, locals, args, exception_table, body]
1094 */
1095
1096 data = CHECK_ARRAY(data);
1097
1098 magic = CHECK_STRING(rb_ary_entry(data, i++));
1099 version1 = CHECK_INTEGER(rb_ary_entry(data, i++));
1100 version2 = CHECK_INTEGER(rb_ary_entry(data, i++));
1101 format_type = CHECK_INTEGER(rb_ary_entry(data, i++));
1102 misc = CHECK_HASH(rb_ary_entry(data, i++));
1103 ((void)magic, (void)version1, (void)version2, (void)format_type);
1104
1105 name = CHECK_STRING(rb_ary_entry(data, i++));
1106 path = CHECK_STRING(rb_ary_entry(data, i++));
1107 realpath = rb_ary_entry(data, i++);
1108 realpath = NIL_P(realpath) ? Qnil : CHECK_STRING(realpath);
1109 int first_lineno = RB_NUM2INT(rb_ary_entry(data, i++));
1110
1111 type = CHECK_SYMBOL(rb_ary_entry(data, i++));
1112 locals = CHECK_ARRAY(rb_ary_entry(data, i++));
1113 params = CHECK_HASH(rb_ary_entry(data, i++));
1114 exception = CHECK_ARRAY(rb_ary_entry(data, i++));
1115 body = CHECK_ARRAY(rb_ary_entry(data, i++));
1116
1117 ISEQ_BODY(iseq)->local_iseq = iseq;
1118
1119 iseq_type = iseq_type_from_sym(type);
1120 if (iseq_type == (enum rb_iseq_type)-1) {
1121 rb_raise(rb_eTypeError, "unsupported type: :%"PRIsVALUE, rb_sym2str(type));
1122 }
1123
1124 node_id = rb_hash_aref(misc, ID2SYM(rb_intern("node_id")));
1125
1126 code_location = rb_hash_aref(misc, ID2SYM(rb_intern("code_location")));
1127 if (RB_TYPE_P(code_location, T_ARRAY) && RARRAY_LEN(code_location) == 4) {
1128 tmp_loc.beg_pos.lineno = NUM2INT(rb_ary_entry(code_location, 0));
1129 tmp_loc.beg_pos.column = NUM2INT(rb_ary_entry(code_location, 1));
1130 tmp_loc.end_pos.lineno = NUM2INT(rb_ary_entry(code_location, 2));
1131 tmp_loc.end_pos.column = NUM2INT(rb_ary_entry(code_location, 3));
1132 }
1133
1134 make_compile_option(&option, opt);
1135 option.peephole_optimization = FALSE; /* because peephole optimization can modify original iseq */
1136 prepare_iseq_build(iseq, name, path, realpath, first_lineno, &tmp_loc, NUM2INT(node_id),
1137 parent, 0, (enum rb_iseq_type)iseq_type, Qnil, &option);
1138
1139 rb_iseq_build_from_ary(iseq, misc, locals, params, exception, body);
1140
1141 finish_iseq_build(iseq);
1142
1143 return iseqw_new(iseq);
1144}
1145
1146/*
1147 * :nodoc:
1148 */
1149static VALUE
1150iseq_s_load(int argc, VALUE *argv, VALUE self)
1151{
1152 VALUE data, opt=Qnil;
1153 rb_scan_args(argc, argv, "11", &data, &opt);
1154 return iseq_load(data, NULL, opt);
1155}
1156
1157VALUE
1158rb_iseq_load(VALUE data, VALUE parent, VALUE opt)
1159{
1160 return iseq_load(data, RTEST(parent) ? (rb_iseq_t *)parent : NULL, opt);
1161}
1162
1163static rb_iseq_t *
1164rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, VALUE opt)
1165{
1166 rb_iseq_t *iseq = NULL;
1167 rb_compile_option_t option;
1168#if !defined(__GNUC__) || (__GNUC__ == 4 && __GNUC_MINOR__ == 8)
1169# define INITIALIZED volatile /* suppress warnings by gcc 4.8 */
1170#else
1171# define INITIALIZED /* volatile */
1172#endif
1173 rb_ast_t *(*parse)(VALUE vparser, VALUE fname, VALUE file, int start);
1174 int ln;
1175 rb_ast_t *INITIALIZED ast;
1176 VALUE name = rb_fstring_lit("<compiled>");
1177
1178 /* safe results first */
1179 make_compile_option(&option, opt);
1180 ln = NUM2INT(line);
1181 StringValueCStr(file);
1182 if (RB_TYPE_P(src, T_FILE)) {
1183 parse = rb_parser_compile_file_path;
1184 }
1185 else {
1186 parse = rb_parser_compile_string_path;
1187 StringValue(src);
1188 }
1189 {
1190 const VALUE parser = rb_parser_new();
1191 const rb_iseq_t *outer_scope = rb_iseq_new(NULL, name, name, Qnil, 0, ISEQ_TYPE_TOP);
1192 VALUE outer_scope_v = (VALUE)outer_scope;
1193 rb_parser_set_context(parser, outer_scope, FALSE);
1194 RB_GC_GUARD(outer_scope_v);
1195 ast = (*parse)(parser, file, src, ln);
1196 }
1197
1198 if (!ast->body.root) {
1199 rb_ast_dispose(ast);
1200 rb_exc_raise(GET_EC()->errinfo);
1201 }
1202 else {
1203 iseq = rb_iseq_new_with_opt(&ast->body, name, file, realpath, ln,
1204 NULL, 0, ISEQ_TYPE_TOP, &option);
1205 rb_ast_dispose(ast);
1206 }
1207
1208 return iseq;
1209}
1210
1211VALUE
1212rb_iseq_path(const rb_iseq_t *iseq)
1213{
1214 return pathobj_path(ISEQ_BODY(iseq)->location.pathobj);
1215}
1216
1217VALUE
1218rb_iseq_realpath(const rb_iseq_t *iseq)
1219{
1220 return pathobj_realpath(ISEQ_BODY(iseq)->location.pathobj);
1221}
1222
1223VALUE
1224rb_iseq_absolute_path(const rb_iseq_t *iseq)
1225{
1226 return rb_iseq_realpath(iseq);
1227}
1228
1229int
1230rb_iseq_from_eval_p(const rb_iseq_t *iseq)
1231{
1232 return NIL_P(rb_iseq_realpath(iseq));
1233}
1234
1235VALUE
1236rb_iseq_label(const rb_iseq_t *iseq)
1237{
1238 return ISEQ_BODY(iseq)->location.label;
1239}
1240
1241VALUE
1242rb_iseq_base_label(const rb_iseq_t *iseq)
1243{
1244 return ISEQ_BODY(iseq)->location.base_label;
1245}
1246
1247VALUE
1248rb_iseq_first_lineno(const rb_iseq_t *iseq)
1249{
1250 return RB_INT2NUM(ISEQ_BODY(iseq)->location.first_lineno);
1251}
1252
1253VALUE
1254rb_iseq_method_name(const rb_iseq_t *iseq)
1255{
1256 struct rb_iseq_constant_body *const body = ISEQ_BODY(ISEQ_BODY(iseq)->local_iseq);
1257
1258 if (body->type == ISEQ_TYPE_METHOD) {
1259 return body->location.base_label;
1260 }
1261 else {
1262 return Qnil;
1263 }
1264}
1265
1266void
1267rb_iseq_code_location(const rb_iseq_t *iseq, int *beg_pos_lineno, int *beg_pos_column, int *end_pos_lineno, int *end_pos_column)
1268{
1269 const rb_code_location_t *loc = &ISEQ_BODY(iseq)->location.code_location;
1270 if (beg_pos_lineno) *beg_pos_lineno = loc->beg_pos.lineno;
1271 if (beg_pos_column) *beg_pos_column = loc->beg_pos.column;
1272 if (end_pos_lineno) *end_pos_lineno = loc->end_pos.lineno;
1273 if (end_pos_column) *end_pos_column = loc->end_pos.column;
1274}
1275
1276static ID iseq_type_id(enum rb_iseq_type type);
1277
1278VALUE
1279rb_iseq_type(const rb_iseq_t *iseq)
1280{
1281 return ID2SYM(iseq_type_id(ISEQ_BODY(iseq)->type));
1282}
1283
1284VALUE
1285rb_iseq_coverage(const rb_iseq_t *iseq)
1286{
1287 return ISEQ_COVERAGE(iseq);
1288}
1289
1290static int
1291remove_coverage_i(void *vstart, void *vend, size_t stride, void *data)
1292{
1293 VALUE v = (VALUE)vstart;
1294 for (; v != (VALUE)vend; v += stride) {
1295 void *ptr = asan_poisoned_object_p(v);
1296 asan_unpoison_object(v, false);
1297
1298 if (rb_obj_is_iseq(v)) {
1299 rb_iseq_t *iseq = (rb_iseq_t *)v;
1300 ISEQ_COVERAGE_SET(iseq, Qnil);
1301 }
1302
1303 asan_poison_object_if(ptr, v);
1304 }
1305 return 0;
1306}
1307
1308void
1309rb_iseq_remove_coverage_all(void)
1310{
1311 rb_objspace_each_objects(remove_coverage_i, NULL);
1312}
1313
1314/* define wrapper class methods (RubyVM::InstructionSequence) */
1315
1316static void
1317iseqw_mark(void *ptr)
1318{
1319 rb_gc_mark((VALUE)ptr);
1320}
1321
1322static size_t
1323iseqw_memsize(const void *ptr)
1324{
1325 return rb_iseq_memsize((const rb_iseq_t *)ptr);
1326}
1327
1328static const rb_data_type_t iseqw_data_type = {
1329 "T_IMEMO/iseq",
1330 {iseqw_mark, NULL, iseqw_memsize,},
1331 0, 0, RUBY_TYPED_FREE_IMMEDIATELY|RUBY_TYPED_WB_PROTECTED
1332};
1333
1334static VALUE
1335iseqw_new(const rb_iseq_t *iseq)
1336{
1337 if (iseq->wrapper) {
1338 return iseq->wrapper;
1339 }
1340 else {
1341 union { const rb_iseq_t *in; void *out; } deconst;
1342 VALUE obj;
1343 deconst.in = iseq;
1344 obj = TypedData_Wrap_Struct(rb_cISeq, &iseqw_data_type, deconst.out);
1345 RB_OBJ_WRITTEN(obj, Qundef, iseq);
1346
1347 /* cache a wrapper object */
1348 RB_OBJ_WRITE((VALUE)iseq, &iseq->wrapper, obj);
1349 RB_OBJ_FREEZE((VALUE)iseq);
1350
1351 return obj;
1352 }
1353}
1354
1355VALUE
1356rb_iseqw_new(const rb_iseq_t *iseq)
1357{
1358 return iseqw_new(iseq);
1359}
1360
1361/*
1362 * call-seq:
1363 * InstructionSequence.compile(source[, file[, path[, line[, options]]]]) -> iseq
1364 * InstructionSequence.new(source[, file[, path[, line[, options]]]]) -> iseq
1365 *
1366 * Takes +source+, a String of Ruby code and compiles it to an
1367 * InstructionSequence.
1368 *
1369 * Optionally takes +file+, +path+, and +line+ which describe the file path,
1370 * real path and first line number of the ruby code in +source+ which are
1371 * metadata attached to the returned +iseq+.
1372 *
1373 * +file+ is used for `__FILE__` and exception backtrace. +path+ is used for
1374 * +require_relative+ base. It is recommended these should be the same full
1375 * path.
1376 *
1377 * +options+, which can be +true+, +false+ or a +Hash+, is used to
1378 * modify the default behavior of the Ruby iseq compiler.
1379 *
1380 * For details regarding valid compile options see ::compile_option=.
1381 *
1382 * RubyVM::InstructionSequence.compile("a = 1 + 2")
1383 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1384 *
1385 * path = "test.rb"
1386 * RubyVM::InstructionSequence.compile(File.read(path), path, File.expand_path(path))
1387 * #=> <RubyVM::InstructionSequence:<compiled>@test.rb:1>
1388 *
1389 * path = File.expand_path("test.rb")
1390 * RubyVM::InstructionSequence.compile(File.read(path), path, path)
1391 * #=> <RubyVM::InstructionSequence:<compiled>@/absolute/path/to/test.rb:1>
1392 *
1393 */
1394static VALUE
1395iseqw_s_compile(int argc, VALUE *argv, VALUE self)
1396{
1397 VALUE src, file = Qnil, path = Qnil, line = INT2FIX(1), opt = Qnil;
1398 int i;
1399
1400 i = rb_scan_args(argc, argv, "1*:", &src, NULL, &opt);
1401 if (i > 4+NIL_P(opt)) rb_error_arity(argc, 1, 5);
1402 switch (i) {
1403 case 5: opt = argv[--i];
1404 case 4: line = argv[--i];
1405 case 3: path = argv[--i];
1406 case 2: file = argv[--i];
1407 }
1408
1409 if (NIL_P(file)) file = rb_fstring_lit("<compiled>");
1410 if (NIL_P(path)) path = file;
1411 if (NIL_P(line)) line = INT2FIX(1);
1412
1413 Check_Type(path, T_STRING);
1414 Check_Type(file, T_STRING);
1415
1416 return iseqw_new(rb_iseq_compile_with_option(src, file, path, line, opt));
1417}
1418
1419/*
1420 * call-seq:
1421 * InstructionSequence.compile_file(file[, options]) -> iseq
1422 *
1423 * Takes +file+, a String with the location of a Ruby source file, reads,
1424 * parses and compiles the file, and returns +iseq+, the compiled
1425 * InstructionSequence with source location metadata set.
1426 *
1427 * Optionally takes +options+, which can be +true+, +false+ or a +Hash+, to
1428 * modify the default behavior of the Ruby iseq compiler.
1429 *
1430 * For details regarding valid compile options see ::compile_option=.
1431 *
1432 * # /tmp/hello.rb
1433 * puts "Hello, world!"
1434 *
1435 * # elsewhere
1436 * RubyVM::InstructionSequence.compile_file("/tmp/hello.rb")
1437 * #=> <RubyVM::InstructionSequence:<main>@/tmp/hello.rb>
1438 */
1439static VALUE
1440iseqw_s_compile_file(int argc, VALUE *argv, VALUE self)
1441{
1442 VALUE file, opt = Qnil;
1443 VALUE parser, f, exc = Qnil, ret;
1444 rb_ast_t *ast;
1445 rb_compile_option_t option;
1446 int i;
1447
1448 i = rb_scan_args(argc, argv, "1*:", &file, NULL, &opt);
1449 if (i > 1+NIL_P(opt)) rb_error_arity(argc, 1, 2);
1450 switch (i) {
1451 case 2: opt = argv[--i];
1452 }
1453 FilePathValue(file);
1454 file = rb_fstring(file); /* rb_io_t->pathv gets frozen anyways */
1455
1456 f = rb_file_open_str(file, "r");
1457
1458 rb_execution_context_t *ec = GET_EC();
1459 VALUE v = rb_vm_push_frame_fname(ec, file);
1460
1461 parser = rb_parser_new();
1462 rb_parser_set_context(parser, NULL, FALSE);
1463 ast = (rb_ast_t *)rb_parser_load_file(parser, file);
1464 if (!ast->body.root) exc = GET_EC()->errinfo;
1465
1466 rb_io_close(f);
1467 if (!ast->body.root) {
1468 rb_ast_dispose(ast);
1469 rb_exc_raise(exc);
1470 }
1471
1472 make_compile_option(&option, opt);
1473
1474 ret = iseqw_new(rb_iseq_new_with_opt(&ast->body, rb_fstring_lit("<main>"),
1475 file,
1476 rb_realpath_internal(Qnil, file, 1),
1477 1, NULL, 0, ISEQ_TYPE_TOP, &option));
1478 rb_ast_dispose(ast);
1479
1480 rb_vm_pop_frame(ec);
1481 RB_GC_GUARD(v);
1482 return ret;
1483}
1484
1485/*
1486 * call-seq:
1487 * InstructionSequence.compile_option = options
1488 *
1489 * Sets the default values for various optimizations in the Ruby iseq
1490 * compiler.
1491 *
1492 * Possible values for +options+ include +true+, which enables all options,
1493 * +false+ which disables all options, and +nil+ which leaves all options
1494 * unchanged.
1495 *
1496 * You can also pass a +Hash+ of +options+ that you want to change, any
1497 * options not present in the hash will be left unchanged.
1498 *
1499 * Possible option names (which are keys in +options+) which can be set to
1500 * +true+ or +false+ include:
1501 *
1502 * * +:inline_const_cache+
1503 * * +:instructions_unification+
1504 * * +:operands_unification+
1505 * * +:peephole_optimization+
1506 * * +:specialized_instruction+
1507 * * +:stack_caching+
1508 * * +:tailcall_optimization+
1509 *
1510 * Additionally, +:debug_level+ can be set to an integer.
1511 *
1512 * These default options can be overwritten for a single run of the iseq
1513 * compiler by passing any of the above values as the +options+ parameter to
1514 * ::new, ::compile and ::compile_file.
1515 */
1516static VALUE
1517iseqw_s_compile_option_set(VALUE self, VALUE opt)
1518{
1519 rb_compile_option_t option;
1520 make_compile_option(&option, opt);
1521 COMPILE_OPTION_DEFAULT = option;
1522 return opt;
1523}
1524
1525/*
1526 * call-seq:
1527 * InstructionSequence.compile_option -> options
1528 *
1529 * Returns a hash of default options used by the Ruby iseq compiler.
1530 *
1531 * For details, see InstructionSequence.compile_option=.
1532 */
1533static VALUE
1534iseqw_s_compile_option_get(VALUE self)
1535{
1536 return make_compile_option_value(&COMPILE_OPTION_DEFAULT);
1537}
1538
1539static const rb_iseq_t *
1540iseqw_check(VALUE iseqw)
1541{
1542 rb_iseq_t *iseq = DATA_PTR(iseqw);
1543
1544 if (!ISEQ_BODY(iseq)) {
1545 rb_ibf_load_iseq_complete(iseq);
1546 }
1547
1548 if (!ISEQ_BODY(iseq)->location.label) {
1549 rb_raise(rb_eTypeError, "uninitialized InstructionSequence");
1550 }
1551 return iseq;
1552}
1553
1554const rb_iseq_t *
1555rb_iseqw_to_iseq(VALUE iseqw)
1556{
1557 return iseqw_check(iseqw);
1558}
1559
1560/*
1561 * call-seq:
1562 * iseq.eval -> obj
1563 *
1564 * Evaluates the instruction sequence and returns the result.
1565 *
1566 * RubyVM::InstructionSequence.compile("1 + 2").eval #=> 3
1567 */
1568static VALUE
1569iseqw_eval(VALUE self)
1570{
1571 return rb_iseq_eval(iseqw_check(self));
1572}
1573
1574/*
1575 * Returns a human-readable string representation of this instruction
1576 * sequence, including the #label and #path.
1577 */
1578static VALUE
1579iseqw_inspect(VALUE self)
1580{
1581 const rb_iseq_t *iseq = iseqw_check(self);
1582 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
1583 VALUE klass = rb_class_name(rb_obj_class(self));
1584
1585 if (!body->location.label) {
1586 return rb_sprintf("#<%"PRIsVALUE": uninitialized>", klass);
1587 }
1588 else {
1589 return rb_sprintf("<%"PRIsVALUE":%"PRIsVALUE"@%"PRIsVALUE":%d>",
1590 klass,
1591 body->location.label, rb_iseq_path(iseq),
1592 FIX2INT(rb_iseq_first_lineno(iseq)));
1593 }
1594}
1595
1596/*
1597 * Returns the path of this instruction sequence.
1598 *
1599 * <code><compiled></code> if the iseq was evaluated from a string.
1600 *
1601 * For example, using irb:
1602 *
1603 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
1604 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1605 * iseq.path
1606 * #=> "<compiled>"
1607 *
1608 * Using ::compile_file:
1609 *
1610 * # /tmp/method.rb
1611 * def hello
1612 * puts "hello, world"
1613 * end
1614 *
1615 * # in irb
1616 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
1617 * > iseq.path #=> /tmp/method.rb
1618 */
1619static VALUE
1620iseqw_path(VALUE self)
1621{
1622 return rb_iseq_path(iseqw_check(self));
1623}
1624
1625/*
1626 * Returns the absolute path of this instruction sequence.
1627 *
1628 * +nil+ if the iseq was evaluated from a string.
1629 *
1630 * For example, using ::compile_file:
1631 *
1632 * # /tmp/method.rb
1633 * def hello
1634 * puts "hello, world"
1635 * end
1636 *
1637 * # in irb
1638 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
1639 * > iseq.absolute_path #=> /tmp/method.rb
1640 */
1641static VALUE
1642iseqw_absolute_path(VALUE self)
1643{
1644 return rb_iseq_realpath(iseqw_check(self));
1645}
1646
1647/* Returns the label of this instruction sequence.
1648 *
1649 * <code><main></code> if it's at the top level, <code><compiled></code> if it
1650 * was evaluated from a string.
1651 *
1652 * For example, using irb:
1653 *
1654 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
1655 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1656 * iseq.label
1657 * #=> "<compiled>"
1658 *
1659 * Using ::compile_file:
1660 *
1661 * # /tmp/method.rb
1662 * def hello
1663 * puts "hello, world"
1664 * end
1665 *
1666 * # in irb
1667 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
1668 * > iseq.label #=> <main>
1669 */
1670static VALUE
1671iseqw_label(VALUE self)
1672{
1673 return rb_iseq_label(iseqw_check(self));
1674}
1675
1676/* Returns the base label of this instruction sequence.
1677 *
1678 * For example, using irb:
1679 *
1680 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
1681 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1682 * iseq.base_label
1683 * #=> "<compiled>"
1684 *
1685 * Using ::compile_file:
1686 *
1687 * # /tmp/method.rb
1688 * def hello
1689 * puts "hello, world"
1690 * end
1691 *
1692 * # in irb
1693 * > iseq = RubyVM::InstructionSequence.compile_file('/tmp/method.rb')
1694 * > iseq.base_label #=> <main>
1695 */
1696static VALUE
1697iseqw_base_label(VALUE self)
1698{
1699 return rb_iseq_base_label(iseqw_check(self));
1700}
1701
1702/* Returns the number of the first source line where the instruction sequence
1703 * was loaded from.
1704 *
1705 * For example, using irb:
1706 *
1707 * iseq = RubyVM::InstructionSequence.compile('num = 1 + 2')
1708 * #=> <RubyVM::InstructionSequence:<compiled>@<compiled>>
1709 * iseq.first_lineno
1710 * #=> 1
1711 */
1712static VALUE
1713iseqw_first_lineno(VALUE self)
1714{
1715 return rb_iseq_first_lineno(iseqw_check(self));
1716}
1717
1718static VALUE iseq_data_to_ary(const rb_iseq_t *iseq);
1719
1720/*
1721 * call-seq:
1722 * iseq.to_a -> ary
1723 *
1724 * Returns an Array with 14 elements representing the instruction sequence
1725 * with the following data:
1726 *
1727 * [magic]
1728 * A string identifying the data format. <b>Always
1729 * +YARVInstructionSequence/SimpleDataFormat+.</b>
1730 *
1731 * [major_version]
1732 * The major version of the instruction sequence.
1733 *
1734 * [minor_version]
1735 * The minor version of the instruction sequence.
1736 *
1737 * [format_type]
1738 * A number identifying the data format. <b>Always 1</b>.
1739 *
1740 * [misc]
1741 * A hash containing:
1742 *
1743 * [+:arg_size+]
1744 * the total number of arguments taken by the method or the block (0 if
1745 * _iseq_ doesn't represent a method or block)
1746 * [+:local_size+]
1747 * the number of local variables + 1
1748 * [+:stack_max+]
1749 * used in calculating the stack depth at which a SystemStackError is
1750 * thrown.
1751 *
1752 * [#label]
1753 * The name of the context (block, method, class, module, etc.) that this
1754 * instruction sequence belongs to.
1755 *
1756 * <code><main></code> if it's at the top level, <code><compiled></code> if
1757 * it was evaluated from a string.
1758 *
1759 * [#path]
1760 * The relative path to the Ruby file where the instruction sequence was
1761 * loaded from.
1762 *
1763 * <code><compiled></code> if the iseq was evaluated from a string.
1764 *
1765 * [#absolute_path]
1766 * The absolute path to the Ruby file where the instruction sequence was
1767 * loaded from.
1768 *
1769 * +nil+ if the iseq was evaluated from a string.
1770 *
1771 * [#first_lineno]
1772 * The number of the first source line where the instruction sequence was
1773 * loaded from.
1774 *
1775 * [type]
1776 * The type of the instruction sequence.
1777 *
1778 * Valid values are +:top+, +:method+, +:block+, +:class+, +:rescue+,
1779 * +:ensure+, +:eval+, +:main+, and +plain+.
1780 *
1781 * [locals]
1782 * An array containing the names of all arguments and local variables as
1783 * symbols.
1784 *
1785 * [params]
1786 * An Hash object containing parameter information.
1787 *
1788 * More info about these values can be found in +vm_core.h+.
1789 *
1790 * [catch_table]
1791 * A list of exceptions and control flow operators (rescue, next, redo,
1792 * break, etc.).
1793 *
1794 * [bytecode]
1795 * An array of arrays containing the instruction names and operands that
1796 * make up the body of the instruction sequence.
1797 *
1798 * Note that this format is MRI specific and version dependent.
1799 *
1800 */
1801static VALUE
1802iseqw_to_a(VALUE self)
1803{
1804 const rb_iseq_t *iseq = iseqw_check(self);
1805 return iseq_data_to_ary(iseq);
1806}
1807
1808#if VM_INSN_INFO_TABLE_IMPL == 1 /* binary search */
1809static const struct iseq_insn_info_entry *
1810get_insn_info_binary_search(const rb_iseq_t *iseq, size_t pos)
1811{
1812 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
1813 size_t size = body->insns_info.size;
1814 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
1815 const unsigned int *positions = body->insns_info.positions;
1816 const int debug = 0;
1817
1818 if (debug) {
1819 printf("size: %"PRIuSIZE"\n", size);
1820 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
1821 (size_t)0, positions[0], insns_info[0].line_no, pos);
1822 }
1823
1824 if (size == 0) {
1825 return NULL;
1826 }
1827 else if (size == 1) {
1828 return &insns_info[0];
1829 }
1830 else {
1831 size_t l = 1, r = size - 1;
1832 while (l <= r) {
1833 size_t m = l + (r - l) / 2;
1834 if (positions[m] == pos) {
1835 return &insns_info[m];
1836 }
1837 if (positions[m] < pos) {
1838 l = m + 1;
1839 }
1840 else {
1841 r = m - 1;
1842 }
1843 }
1844 if (l >= size) {
1845 return &insns_info[size-1];
1846 }
1847 if (positions[l] > pos) {
1848 return &insns_info[l-1];
1849 }
1850 return &insns_info[l];
1851 }
1852}
1853
1854static const struct iseq_insn_info_entry *
1855get_insn_info(const rb_iseq_t *iseq, size_t pos)
1856{
1857 return get_insn_info_binary_search(iseq, pos);
1858}
1859#endif
1860
1861#if VM_INSN_INFO_TABLE_IMPL == 2 /* succinct bitvector */
1862static const struct iseq_insn_info_entry *
1863get_insn_info_succinct_bitvector(const rb_iseq_t *iseq, size_t pos)
1864{
1865 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
1866 size_t size = body->insns_info.size;
1867 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
1868 const int debug = 0;
1869
1870 if (debug) {
1871#if VM_CHECK_MODE > 0
1872 const unsigned int *positions = body->insns_info.positions;
1873 printf("size: %"PRIuSIZE"\n", size);
1874 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
1875 (size_t)0, positions[0], insns_info[0].line_no, pos);
1876#else
1877 printf("size: %"PRIuSIZE"\n", size);
1878 printf("insns_info[%"PRIuSIZE"]: line: %d, pos: %"PRIuSIZE"\n",
1879 (size_t)0, insns_info[0].line_no, pos);
1880#endif
1881 }
1882
1883 if (size == 0) {
1884 return NULL;
1885 }
1886 else if (size == 1) {
1887 return &insns_info[0];
1888 }
1889 else {
1890 int index;
1891 VM_ASSERT(body->insns_info.succ_index_table != NULL);
1892 index = succ_index_lookup(body->insns_info.succ_index_table, (int)pos);
1893 return &insns_info[index-1];
1894 }
1895}
1896
1897static const struct iseq_insn_info_entry *
1898get_insn_info(const rb_iseq_t *iseq, size_t pos)
1899{
1900 return get_insn_info_succinct_bitvector(iseq, pos);
1901}
1902#endif
1903
1904#if VM_CHECK_MODE > 0 || VM_INSN_INFO_TABLE_IMPL == 0
1905static const struct iseq_insn_info_entry *
1906get_insn_info_linear_search(const rb_iseq_t *iseq, size_t pos)
1907{
1908 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
1909 size_t i = 0, size = body->insns_info.size;
1910 const struct iseq_insn_info_entry *insns_info = body->insns_info.body;
1911 const unsigned int *positions = body->insns_info.positions;
1912 const int debug = 0;
1913
1914 if (debug) {
1915 printf("size: %"PRIuSIZE"\n", size);
1916 printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
1917 i, positions[i], insns_info[i].line_no, pos);
1918 }
1919
1920 if (size == 0) {
1921 return NULL;
1922 }
1923 else if (size == 1) {
1924 return &insns_info[0];
1925 }
1926 else {
1927 for (i=1; i<size; i++) {
1928 if (debug) printf("insns_info[%"PRIuSIZE"]: position: %d, line: %d, pos: %"PRIuSIZE"\n",
1929 i, positions[i], insns_info[i].line_no, pos);
1930
1931 if (positions[i] == pos) {
1932 return &insns_info[i];
1933 }
1934 if (positions[i] > pos) {
1935 return &insns_info[i-1];
1936 }
1937 }
1938 }
1939 return &insns_info[i-1];
1940}
1941#endif
1942
1943#if VM_INSN_INFO_TABLE_IMPL == 0 /* linear search */
1944static const struct iseq_insn_info_entry *
1945get_insn_info(const rb_iseq_t *iseq, size_t pos)
1946{
1947 return get_insn_info_linear_search(iseq, pos);
1948}
1949#endif
1950
1951#if VM_CHECK_MODE > 0 && VM_INSN_INFO_TABLE_IMPL > 0
1952static void
1953validate_get_insn_info(const rb_iseq_t *iseq)
1954{
1955 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
1956 size_t i;
1957 for (i = 0; i < body->iseq_size; i++) {
1958 if (get_insn_info_linear_search(iseq, i) != get_insn_info(iseq, i)) {
1959 rb_bug("validate_get_insn_info: get_insn_info_linear_search(iseq, %"PRIuSIZE") != get_insn_info(iseq, %"PRIuSIZE")", i, i);
1960 }
1961 }
1962}
1963#endif
1964
1965unsigned int
1966rb_iseq_line_no(const rb_iseq_t *iseq, size_t pos)
1967{
1968 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
1969
1970 if (entry) {
1971 return entry->line_no;
1972 }
1973 else {
1974 return 0;
1975 }
1976}
1977
1978#ifdef USE_ISEQ_NODE_ID
1979int
1980rb_iseq_node_id(const rb_iseq_t *iseq, size_t pos)
1981{
1982 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
1983
1984 if (entry) {
1985 return entry->node_id;
1986 }
1987 else {
1988 return 0;
1989 }
1990}
1991#endif
1992
1993MJIT_FUNC_EXPORTED rb_event_flag_t
1994rb_iseq_event_flags(const rb_iseq_t *iseq, size_t pos)
1995{
1996 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pos);
1997 if (entry) {
1998 return entry->events;
1999 }
2000 else {
2001 return 0;
2002 }
2003}
2004
2005void
2006rb_iseq_clear_event_flags(const rb_iseq_t *iseq, size_t pos, rb_event_flag_t reset)
2007{
2008 struct iseq_insn_info_entry *entry = (struct iseq_insn_info_entry *)get_insn_info(iseq, pos);
2009 if (entry) {
2010 entry->events &= ~reset;
2011 if (!(entry->events & iseq->aux.exec.global_trace_events)) {
2012 void rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos);
2013 rb_iseq_trace_flag_cleared(iseq, pos);
2014 }
2015 }
2016}
2017
2018static VALUE
2019local_var_name(const rb_iseq_t *diseq, VALUE level, VALUE op)
2020{
2021 VALUE i;
2022 VALUE name;
2023 ID lid;
2024 int idx;
2025
2026 for (i = 0; i < level; i++) {
2027 diseq = ISEQ_BODY(diseq)->parent_iseq;
2028 }
2029 idx = ISEQ_BODY(diseq)->local_table_size - (int)op - 1;
2030 lid = ISEQ_BODY(diseq)->local_table[idx];
2031 name = rb_id2str(lid);
2032 if (!name) {
2033 name = rb_str_new_cstr("?");
2034 }
2035 else if (!rb_str_symname_p(name)) {
2036 name = rb_str_inspect(name);
2037 }
2038 else {
2039 name = rb_str_dup(name);
2040 }
2041 rb_str_catf(name, "@%d", idx);
2042 return name;
2043}
2044
2045int rb_insn_unified_local_var_level(VALUE);
2046VALUE rb_dump_literal(VALUE lit);
2047
2048VALUE
2049rb_insn_operand_intern(const rb_iseq_t *iseq,
2050 VALUE insn, int op_no, VALUE op,
2051 int len, size_t pos, const VALUE *pnop, VALUE child)
2052{
2053 const char *types = insn_op_types(insn);
2054 char type = types[op_no];
2055 VALUE ret = Qundef;
2056
2057 switch (type) {
2058 case TS_OFFSET: /* LONG */
2059 ret = rb_sprintf("%"PRIdVALUE, (VALUE)(pos + len + op));
2060 break;
2061
2062 case TS_NUM: /* ULONG */
2063 if (insn == BIN(defined) && op_no == 0) {
2064 enum defined_type deftype = (enum defined_type)op;
2065 switch (deftype) {
2066 case DEFINED_FUNC:
2067 ret = rb_fstring_lit("func");
2068 break;
2069 case DEFINED_REF:
2070 ret = rb_fstring_lit("ref");
2071 break;
2072 case DEFINED_CONST_FROM:
2073 ret = rb_fstring_lit("constant-from");
2074 break;
2075 default:
2076 ret = rb_iseq_defined_string(deftype);
2077 break;
2078 }
2079 if (ret) break;
2080 }
2081 else if (insn == BIN(checktype) && op_no == 0) {
2082 const char *type_str = rb_type_str((enum ruby_value_type)op);
2083 if (type_str) {
2084 ret = rb_str_new_cstr(type_str); break;
2085 }
2086 }
2087 ret = rb_sprintf("%"PRIuVALUE, op);
2088 break;
2089
2090 case TS_LINDEX:{
2091 int level;
2092 if (types[op_no+1] == TS_NUM && pnop) {
2093 ret = local_var_name(iseq, *pnop, op - VM_ENV_DATA_SIZE);
2094 }
2095 else if ((level = rb_insn_unified_local_var_level(insn)) >= 0) {
2096 ret = local_var_name(iseq, (VALUE)level, op - VM_ENV_DATA_SIZE);
2097 }
2098 else {
2099 ret = rb_inspect(INT2FIX(op));
2100 }
2101 break;
2102 }
2103 case TS_ID: /* ID (symbol) */
2104 ret = rb_inspect(ID2SYM(op));
2105 break;
2106
2107 case TS_VALUE: /* VALUE */
2108 op = obj_resurrect(op);
2109 if (insn == BIN(defined) && op_no == 1 && FIXNUM_P(op)) {
2110 /* should be DEFINED_REF */
2111 int type = NUM2INT(op);
2112 if (type) {
2113 if (type & 1) {
2114 ret = rb_sprintf(":$%c", (type >> 1));
2115 }
2116 else {
2117 ret = rb_sprintf(":$%d", (type >> 1));
2118 }
2119 break;
2120 }
2121 }
2122 ret = rb_dump_literal(op);
2123 if (CLASS_OF(op) == rb_cISeq) {
2124 if (child) {
2125 rb_ary_push(child, op);
2126 }
2127 }
2128 break;
2129
2130 case TS_ISEQ: /* iseq */
2131 {
2132 if (op) {
2133 const rb_iseq_t *iseq = rb_iseq_check((rb_iseq_t *)op);
2134 ret = ISEQ_BODY(iseq)->location.label;
2135 if (child) {
2136 rb_ary_push(child, (VALUE)iseq);
2137 }
2138 }
2139 else {
2140 ret = rb_str_new2("nil");
2141 }
2142 break;
2143 }
2144
2145 case TS_IC:
2146 {
2147 ret = rb_sprintf("<ic:%"PRIdPTRDIFF" ", (union iseq_inline_storage_entry *)op - ISEQ_BODY(iseq)->is_entries);
2148 const ID *segments = ((IC)op)->segments;
2149 rb_str_cat2(ret, rb_id2name(*segments++));
2150 while (*segments) {
2151 rb_str_catf(ret, "::%s", rb_id2name(*segments++));
2152 }
2153 rb_str_cat2(ret, ">");
2154 }
2155 break;
2156 case TS_IVC:
2157 case TS_ICVARC:
2158 case TS_ISE:
2159 ret = rb_sprintf("<is:%"PRIdPTRDIFF">", (union iseq_inline_storage_entry *)op - ISEQ_BODY(iseq)->is_entries);
2160 break;
2161
2162 case TS_CALLDATA:
2163 {
2164 struct rb_call_data *cd = (struct rb_call_data *)op;
2165 const struct rb_callinfo *ci = cd->ci;
2166 VALUE ary = rb_ary_new();
2167 ID mid = vm_ci_mid(ci);
2168
2169 if (mid) {
2170 rb_ary_push(ary, rb_sprintf("mid:%"PRIsVALUE, rb_id2str(mid)));
2171 }
2172
2173 rb_ary_push(ary, rb_sprintf("argc:%d", vm_ci_argc(ci)));
2174
2175 if (vm_ci_flag(ci) & VM_CALL_KWARG) {
2176 const struct rb_callinfo_kwarg *kw_args = vm_ci_kwarg(ci);
2177 VALUE kw_ary = rb_ary_new_from_values(kw_args->keyword_len, kw_args->keywords);
2178 rb_ary_push(ary, rb_sprintf("kw:[%"PRIsVALUE"]", rb_ary_join(kw_ary, rb_str_new2(","))));
2179 }
2180
2181 if (vm_ci_flag(ci)) {
2182 VALUE flags = rb_ary_new();
2183# define CALL_FLAG(n) if (vm_ci_flag(ci) & VM_CALL_##n) rb_ary_push(flags, rb_str_new2(#n))
2184 CALL_FLAG(ARGS_SPLAT);
2185 CALL_FLAG(ARGS_BLOCKARG);
2186 CALL_FLAG(FCALL);
2187 CALL_FLAG(VCALL);
2188 CALL_FLAG(ARGS_SIMPLE);
2189 CALL_FLAG(BLOCKISEQ);
2190 CALL_FLAG(TAILCALL);
2191 CALL_FLAG(SUPER);
2192 CALL_FLAG(ZSUPER);
2193 CALL_FLAG(KWARG);
2194 CALL_FLAG(KW_SPLAT);
2195 CALL_FLAG(KW_SPLAT_MUT);
2196 CALL_FLAG(OPT_SEND); /* maybe not reachable */
2197 rb_ary_push(ary, rb_ary_join(flags, rb_str_new2("|")));
2198 }
2199
2200 ret = rb_sprintf("<calldata!%"PRIsVALUE">", rb_ary_join(ary, rb_str_new2(", ")));
2201 }
2202 break;
2203
2204 case TS_CDHASH:
2205 ret = rb_str_new2("<cdhash>");
2206 break;
2207
2208 case TS_FUNCPTR:
2209 {
2210#ifdef HAVE_DLADDR
2211 Dl_info info;
2212 if (dladdr((void *)op, &info) && info.dli_sname) {
2213 ret = rb_str_new_cstr(info.dli_sname);
2214 break;
2215 }
2216#endif
2217 ret = rb_str_new2("<funcptr>");
2218 }
2219 break;
2220
2221 case TS_BUILTIN:
2222 {
2223 const struct rb_builtin_function *bf = (const struct rb_builtin_function *)op;
2224 ret = rb_sprintf("<builtin!%s/%d>",
2225 bf->name, bf->argc);
2226 }
2227 break;
2228
2229 default:
2230 rb_bug("unknown operand type: %c", type);
2231 }
2232 return ret;
2233}
2234
2235static VALUE
2236right_strip(VALUE str)
2237{
2238 const char *beg = RSTRING_PTR(str), *end = RSTRING_END(str);
2239 while (end-- > beg && *end == ' ');
2240 rb_str_set_len(str, end - beg + 1);
2241 return str;
2242}
2243
2248int
2249rb_iseq_disasm_insn(VALUE ret, const VALUE *code, size_t pos,
2250 const rb_iseq_t *iseq, VALUE child)
2251{
2252 VALUE insn = code[pos];
2253 int len = insn_len(insn);
2254 int j;
2255 const char *types = insn_op_types(insn);
2256 VALUE str = rb_str_new(0, 0);
2257 const char *insn_name_buff;
2258
2259 insn_name_buff = insn_name(insn);
2260 if (1) {
2261 extern const int rb_vm_max_insn_name_size;
2262 rb_str_catf(str, "%04"PRIuSIZE" %-*s ", pos, rb_vm_max_insn_name_size, insn_name_buff);
2263 }
2264 else {
2265 rb_str_catf(str, "%04"PRIuSIZE" %-28.*s ", pos,
2266 (int)strcspn(insn_name_buff, "_"), insn_name_buff);
2267 }
2268
2269 for (j = 0; types[j]; j++) {
2270 VALUE opstr = rb_insn_operand_intern(iseq, insn, j, code[pos + j + 1],
2271 len, pos, &code[pos + j + 2],
2272 child);
2273 rb_str_concat(str, opstr);
2274
2275 if (types[j + 1]) {
2276 rb_str_cat2(str, ", ");
2277 }
2278 }
2279
2280 {
2281 unsigned int line_no = rb_iseq_line_no(iseq, pos);
2282 unsigned int prev = pos == 0 ? 0 : rb_iseq_line_no(iseq, pos - 1);
2283 if (line_no && line_no != prev) {
2284 long slen = RSTRING_LEN(str);
2285 slen = (slen > 70) ? 0 : (70 - slen);
2286 str = rb_str_catf(str, "%*s(%4d)", (int)slen, "", line_no);
2287 }
2288 }
2289
2290 {
2291 rb_event_flag_t events = rb_iseq_event_flags(iseq, pos);
2292 if (events) {
2293 str = rb_str_catf(str, "[%s%s%s%s%s%s%s%s%s%s%s]",
2294 events & RUBY_EVENT_LINE ? "Li" : "",
2295 events & RUBY_EVENT_CLASS ? "Cl" : "",
2296 events & RUBY_EVENT_END ? "En" : "",
2297 events & RUBY_EVENT_CALL ? "Ca" : "",
2298 events & RUBY_EVENT_RETURN ? "Re" : "",
2299 events & RUBY_EVENT_C_CALL ? "Cc" : "",
2300 events & RUBY_EVENT_C_RETURN ? "Cr" : "",
2301 events & RUBY_EVENT_B_CALL ? "Bc" : "",
2302 events & RUBY_EVENT_B_RETURN ? "Br" : "",
2303 events & RUBY_EVENT_COVERAGE_LINE ? "Cli" : "",
2304 events & RUBY_EVENT_COVERAGE_BRANCH ? "Cbr" : "");
2305 }
2306 }
2307
2308 right_strip(str);
2309 if (ret) {
2310 rb_str_cat2(str, "\n");
2311 rb_str_concat(ret, str);
2312 }
2313 else {
2314 printf("%.*s\n", (int)RSTRING_LEN(str), RSTRING_PTR(str));
2315 }
2316 return len;
2317}
2318
2319static const char *
2320catch_type(int type)
2321{
2322 switch (type) {
2323 case CATCH_TYPE_RESCUE:
2324 return "rescue";
2325 case CATCH_TYPE_ENSURE:
2326 return "ensure";
2327 case CATCH_TYPE_RETRY:
2328 return "retry";
2329 case CATCH_TYPE_BREAK:
2330 return "break";
2331 case CATCH_TYPE_REDO:
2332 return "redo";
2333 case CATCH_TYPE_NEXT:
2334 return "next";
2335 default:
2336 rb_bug("unknown catch type: %d", type);
2337 return 0;
2338 }
2339}
2340
2341static VALUE
2342iseq_inspect(const rb_iseq_t *iseq)
2343{
2344 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2345 if (!body->location.label) {
2346 return rb_sprintf("#<ISeq: uninitialized>");
2347 }
2348 else {
2349 const rb_code_location_t *loc = &body->location.code_location;
2350 return rb_sprintf("#<ISeq:%"PRIsVALUE"@%"PRIsVALUE":%d (%d,%d)-(%d,%d)>",
2351 body->location.label, rb_iseq_path(iseq),
2352 loc->beg_pos.lineno,
2353 loc->beg_pos.lineno,
2354 loc->beg_pos.column,
2355 loc->end_pos.lineno,
2356 loc->end_pos.column);
2357 }
2358}
2359
2360static const rb_data_type_t tmp_set = {
2361 "tmpset",
2362 {(void (*)(void *))rb_mark_set, (void (*)(void *))st_free_table, 0, 0,},
2363 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
2364};
2365
2366static VALUE
2367rb_iseq_disasm_recursive(const rb_iseq_t *iseq, VALUE indent)
2368{
2369 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2370 VALUE *code;
2371 VALUE str = rb_str_new(0, 0);
2372 VALUE child = rb_ary_hidden_new(3);
2373 unsigned int size;
2374 unsigned int i;
2375 long l;
2376 size_t n;
2377 enum {header_minlen = 72};
2378 st_table *done_iseq = 0;
2379 VALUE done_iseq_wrapper = Qnil;
2380 const char *indent_str;
2381 long indent_len;
2382
2383 size = body->iseq_size;
2384
2385 indent_len = RSTRING_LEN(indent);
2386 indent_str = RSTRING_PTR(indent);
2387
2388 rb_str_cat(str, indent_str, indent_len);
2389 rb_str_cat2(str, "== disasm: ");
2390
2391 rb_str_append(str, iseq_inspect(iseq));
2392 rb_str_catf(str, " (catch: %s)", body->catch_except_p ? "true" : "false");
2393 if ((l = RSTRING_LEN(str) - indent_len) < header_minlen) {
2394 rb_str_modify_expand(str, header_minlen - l);
2395 memset(RSTRING_END(str), '=', header_minlen - l);
2396 }
2397 rb_str_cat2(str, "\n");
2398
2399 /* show catch table information */
2400 if (body->catch_table) {
2401 rb_str_cat(str, indent_str, indent_len);
2402 rb_str_cat2(str, "== catch table\n");
2403 }
2404 if (body->catch_table) {
2405 rb_str_cat_cstr(indent, "| ");
2406 indent_str = RSTRING_PTR(indent);
2407 for (i = 0; i < body->catch_table->size; i++) {
2408 const struct iseq_catch_table_entry *entry =
2409 UNALIGNED_MEMBER_PTR(body->catch_table, entries[i]);
2410 rb_str_cat(str, indent_str, indent_len);
2411 rb_str_catf(str,
2412 "| catch type: %-6s st: %04d ed: %04d sp: %04d cont: %04d\n",
2413 catch_type((int)entry->type), (int)entry->start,
2414 (int)entry->end, (int)entry->sp, (int)entry->cont);
2415 if (entry->iseq && !(done_iseq && st_is_member(done_iseq, (st_data_t)entry->iseq))) {
2416 rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check(entry->iseq), indent));
2417 if (!done_iseq) {
2418 done_iseq = st_init_numtable();
2419 done_iseq_wrapper = TypedData_Wrap_Struct(0, &tmp_set, done_iseq);
2420 }
2421 st_insert(done_iseq, (st_data_t)entry->iseq, (st_data_t)0);
2422 indent_str = RSTRING_PTR(indent);
2423 }
2424 }
2425 rb_str_resize(indent, indent_len);
2426 indent_str = RSTRING_PTR(indent);
2427 }
2428 if (body->catch_table) {
2429 rb_str_cat(str, indent_str, indent_len);
2430 rb_str_cat2(str, "|-------------------------------------"
2431 "-----------------------------------\n");
2432 }
2433
2434 /* show local table information */
2435 if (body->local_table) {
2436 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
2437 rb_str_cat(str, indent_str, indent_len);
2438 rb_str_catf(str,
2439 "local table (size: %d, argc: %d "
2440 "[opts: %d, rest: %d, post: %d, block: %d, kw: %d@%d, kwrest: %d])\n",
2441 body->local_table_size,
2442 body->param.lead_num,
2443 body->param.opt_num,
2444 body->param.flags.has_rest ? body->param.rest_start : -1,
2445 body->param.post_num,
2446 body->param.flags.has_block ? body->param.block_start : -1,
2447 body->param.flags.has_kw ? keyword->num : -1,
2448 body->param.flags.has_kw ? keyword->required_num : -1,
2449 body->param.flags.has_kwrest ? keyword->rest_start : -1);
2450
2451 for (i = body->local_table_size; i > 0;) {
2452 int li = body->local_table_size - --i - 1;
2453 long width;
2454 VALUE name = local_var_name(iseq, 0, i);
2455 char argi[0x100];
2456 char opti[0x100];
2457
2458 opti[0] = '\0';
2459 if (body->param.flags.has_opt) {
2460 int argc = body->param.lead_num;
2461 int opts = body->param.opt_num;
2462 if (li >= argc && li < argc + opts) {
2463 snprintf(opti, sizeof(opti), "Opt=%"PRIdVALUE,
2464 body->param.opt_table[li - argc]);
2465 }
2466 }
2467
2468 snprintf(argi, sizeof(argi), "%s%s%s%s%s%s", /* arg, opts, rest, post, kwrest, block */
2469 body->param.lead_num > li ? "Arg" : "",
2470 opti,
2471 (body->param.flags.has_rest && body->param.rest_start == li) ? "Rest" : "",
2472 (body->param.flags.has_post && body->param.post_start <= li && li < body->param.post_start + body->param.post_num) ? "Post" : "",
2473 (body->param.flags.has_kwrest && keyword->rest_start == li) ? "Kwrest" : "",
2474 (body->param.flags.has_block && body->param.block_start == li) ? "Block" : "");
2475
2476 rb_str_cat(str, indent_str, indent_len);
2477 rb_str_catf(str, "[%2d] ", i + 1);
2478 width = RSTRING_LEN(str) + 11;
2479 rb_str_append(str, name);
2480 if (*argi) rb_str_catf(str, "<%s>", argi);
2481 if ((width -= RSTRING_LEN(str)) > 0) rb_str_catf(str, "%*s", (int)width, "");
2482 }
2483 rb_str_cat_cstr(right_strip(str), "\n");
2484 }
2485
2486 /* show each line */
2487 code = rb_iseq_original_iseq(iseq);
2488 for (n = 0; n < size;) {
2489 rb_str_cat(str, indent_str, indent_len);
2490 n += rb_iseq_disasm_insn(str, code, n, iseq, child);
2491 }
2492
2493 for (l = 0; l < RARRAY_LEN(child); l++) {
2494 VALUE isv = rb_ary_entry(child, l);
2495 if (done_iseq && st_is_member(done_iseq, (st_data_t)isv)) continue;
2496 rb_str_cat_cstr(str, "\n");
2497 rb_str_concat(str, rb_iseq_disasm_recursive(rb_iseq_check((rb_iseq_t *)isv), indent));
2498 indent_str = RSTRING_PTR(indent);
2499 }
2500 RB_GC_GUARD(done_iseq_wrapper);
2501
2502 return str;
2503}
2504
2505VALUE
2506rb_iseq_disasm(const rb_iseq_t *iseq)
2507{
2508 VALUE str = rb_iseq_disasm_recursive(iseq, rb_str_new(0, 0));
2509 rb_str_resize(str, RSTRING_LEN(str));
2510 return str;
2511}
2512
2513/*
2514 * Estimates the number of instance variables that will be set on
2515 * a given `class` with the initialize method defined in
2516 * `initialize_iseq`
2517 */
2518attr_index_t
2519rb_estimate_iv_count(VALUE klass, const rb_iseq_t * initialize_iseq)
2520{
2521 struct rb_id_table * iv_names = rb_id_table_create(0);
2522
2523 for (unsigned int i = 0; i < ISEQ_BODY(initialize_iseq)->ivc_size; i++) {
2524 IVC cache = (IVC)&ISEQ_BODY(initialize_iseq)->is_entries[i];
2525
2526 if (cache->iv_set_name) {
2527 rb_id_table_insert(iv_names, cache->iv_set_name, Qtrue);
2528 }
2529 }
2530
2531 attr_index_t count = (attr_index_t)rb_id_table_size(iv_names);
2532
2533 VALUE superclass = rb_class_superclass(klass);
2534 count += RCLASS_EXT(superclass)->max_iv_count;
2535
2536 rb_id_table_free(iv_names);
2537
2538 return count;
2539}
2540
2541/*
2542 * call-seq:
2543 * iseq.disasm -> str
2544 * iseq.disassemble -> str
2545 *
2546 * Returns the instruction sequence as a +String+ in human readable form.
2547 *
2548 * puts RubyVM::InstructionSequence.compile('1 + 2').disasm
2549 *
2550 * Produces:
2551 *
2552 * == disasm: <RubyVM::InstructionSequence:<compiled>@<compiled>>==========
2553 * 0000 trace 1 ( 1)
2554 * 0002 putobject 1
2555 * 0004 putobject 2
2556 * 0006 opt_plus <ic:1>
2557 * 0008 leave
2558 */
2559static VALUE
2560iseqw_disasm(VALUE self)
2561{
2562 return rb_iseq_disasm(iseqw_check(self));
2563}
2564
2565static int
2566iseq_iterate_children(const rb_iseq_t *iseq, void (*iter_func)(const rb_iseq_t *child_iseq, void *data), void *data)
2567{
2568 unsigned int i;
2569 VALUE *code = rb_iseq_original_iseq(iseq);
2570 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2571 const rb_iseq_t *child;
2572 VALUE all_children = rb_obj_hide(rb_ident_hash_new());
2573
2574 if (body->catch_table) {
2575 for (i = 0; i < body->catch_table->size; i++) {
2576 const struct iseq_catch_table_entry *entry =
2577 UNALIGNED_MEMBER_PTR(body->catch_table, entries[i]);
2578 child = entry->iseq;
2579 if (child) {
2580 if (NIL_P(rb_hash_aref(all_children, (VALUE)child))) {
2581 rb_hash_aset(all_children, (VALUE)child, Qtrue);
2582 (*iter_func)(child, data);
2583 }
2584 }
2585 }
2586 }
2587
2588 for (i=0; i<body->iseq_size;) {
2589 VALUE insn = code[i];
2590 int len = insn_len(insn);
2591 const char *types = insn_op_types(insn);
2592 int j;
2593
2594 for (j=0; types[j]; j++) {
2595 switch (types[j]) {
2596 case TS_ISEQ:
2597 child = (const rb_iseq_t *)code[i+j+1];
2598 if (child) {
2599 if (NIL_P(rb_hash_aref(all_children, (VALUE)child))) {
2600 rb_hash_aset(all_children, (VALUE)child, Qtrue);
2601 (*iter_func)(child, data);
2602 }
2603 }
2604 break;
2605 default:
2606 break;
2607 }
2608 }
2609 i += len;
2610 }
2611
2612 return (int)RHASH_SIZE(all_children);
2613}
2614
2615static void
2616yield_each_children(const rb_iseq_t *child_iseq, void *data)
2617{
2618 rb_yield(iseqw_new(child_iseq));
2619}
2620
2621/*
2622 * call-seq:
2623 * iseq.each_child{|child_iseq| ...} -> iseq
2624 *
2625 * Iterate all direct child instruction sequences.
2626 * Iteration order is implementation/version defined
2627 * so that people should not rely on the order.
2628 */
2629static VALUE
2630iseqw_each_child(VALUE self)
2631{
2632 const rb_iseq_t *iseq = iseqw_check(self);
2633 iseq_iterate_children(iseq, yield_each_children, NULL);
2634 return self;
2635}
2636
2637static void
2638push_event_info(const rb_iseq_t *iseq, rb_event_flag_t events, int line, VALUE ary)
2639{
2640#define C(ev, cstr, l) if (events & ev) rb_ary_push(ary, rb_ary_new_from_args(2, l, ID2SYM(rb_intern(cstr))));
2641 C(RUBY_EVENT_CLASS, "class", rb_iseq_first_lineno(iseq));
2642 C(RUBY_EVENT_CALL, "call", rb_iseq_first_lineno(iseq));
2643 C(RUBY_EVENT_B_CALL, "b_call", rb_iseq_first_lineno(iseq));
2644 C(RUBY_EVENT_LINE, "line", INT2FIX(line));
2645 C(RUBY_EVENT_END, "end", INT2FIX(line));
2646 C(RUBY_EVENT_RETURN, "return", INT2FIX(line));
2647 C(RUBY_EVENT_B_RETURN, "b_return", INT2FIX(line));
2648#undef C
2649}
2650
2651/*
2652 * call-seq:
2653 * iseq.trace_points -> ary
2654 *
2655 * Return trace points in the instruction sequence.
2656 * Return an array of [line, event_symbol] pair.
2657 */
2658static VALUE
2659iseqw_trace_points(VALUE self)
2660{
2661 const rb_iseq_t *iseq = iseqw_check(self);
2662 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
2663 unsigned int i;
2664 VALUE ary = rb_ary_new();
2665
2666 for (i=0; i<body->insns_info.size; i++) {
2667 const struct iseq_insn_info_entry *entry = &body->insns_info.body[i];
2668 if (entry->events) {
2669 push_event_info(iseq, entry->events, entry->line_no, ary);
2670 }
2671 }
2672 return ary;
2673}
2674
2675/*
2676 * Returns the instruction sequence containing the given proc or method.
2677 *
2678 * For example, using irb:
2679 *
2680 * # a proc
2681 * > p = proc { num = 1 + 2 }
2682 * > RubyVM::InstructionSequence.of(p)
2683 * > #=> <RubyVM::InstructionSequence:block in irb_binding@(irb)>
2684 *
2685 * # for a method
2686 * > def foo(bar); puts bar; end
2687 * > RubyVM::InstructionSequence.of(method(:foo))
2688 * > #=> <RubyVM::InstructionSequence:foo@(irb)>
2689 *
2690 * Using ::compile_file:
2691 *
2692 * # /tmp/iseq_of.rb
2693 * def hello
2694 * puts "hello, world"
2695 * end
2696 *
2697 * $a_global_proc = proc { str = 'a' + 'b' }
2698 *
2699 * # in irb
2700 * > require '/tmp/iseq_of.rb'
2701 *
2702 * # first the method hello
2703 * > RubyVM::InstructionSequence.of(method(:hello))
2704 * > #=> #<RubyVM::InstructionSequence:0x007fb73d7cb1d0>
2705 *
2706 * # then the global proc
2707 * > RubyVM::InstructionSequence.of($a_global_proc)
2708 * > #=> #<RubyVM::InstructionSequence:0x007fb73d7caf78>
2709 */
2710static VALUE
2711iseqw_s_of(VALUE klass, VALUE body)
2712{
2713 const rb_iseq_t *iseq = NULL;
2714
2715 if (rb_obj_is_proc(body)) {
2716 iseq = vm_proc_iseq(body);
2717
2718 if (!rb_obj_is_iseq((VALUE)iseq)) {
2719 iseq = NULL;
2720 }
2721 }
2722 else if (rb_obj_is_method(body)) {
2723 iseq = rb_method_iseq(body);
2724 }
2725 else if (rb_typeddata_is_instance_of(body, &iseqw_data_type)) {
2726 return body;
2727 }
2728
2729 return iseq ? iseqw_new(iseq) : Qnil;
2730}
2731
2732/*
2733 * call-seq:
2734 * InstructionSequence.disasm(body) -> str
2735 * InstructionSequence.disassemble(body) -> str
2736 *
2737 * Takes +body+, a Method or Proc object, and returns a String with the
2738 * human readable instructions for +body+.
2739 *
2740 * For a Method object:
2741 *
2742 * # /tmp/method.rb
2743 * def hello
2744 * puts "hello, world"
2745 * end
2746 *
2747 * puts RubyVM::InstructionSequence.disasm(method(:hello))
2748 *
2749 * Produces:
2750 *
2751 * == disasm: <RubyVM::InstructionSequence:hello@/tmp/method.rb>============
2752 * 0000 trace 8 ( 1)
2753 * 0002 trace 1 ( 2)
2754 * 0004 putself
2755 * 0005 putstring "hello, world"
2756 * 0007 send :puts, 1, nil, 8, <ic:0>
2757 * 0013 trace 16 ( 3)
2758 * 0015 leave ( 2)
2759 *
2760 * For a Proc:
2761 *
2762 * # /tmp/proc.rb
2763 * p = proc { num = 1 + 2 }
2764 * puts RubyVM::InstructionSequence.disasm(p)
2765 *
2766 * Produces:
2767 *
2768 * == disasm: <RubyVM::InstructionSequence:block in <main>@/tmp/proc.rb>===
2769 * == catch table
2770 * | catch type: redo st: 0000 ed: 0012 sp: 0000 cont: 0000
2771 * | catch type: next st: 0000 ed: 0012 sp: 0000 cont: 0012
2772 * |------------------------------------------------------------------------
2773 * local table (size: 2, argc: 0 [opts: 0, rest: -1, post: 0, block: -1] s1)
2774 * [ 2] num
2775 * 0000 trace 1 ( 1)
2776 * 0002 putobject 1
2777 * 0004 putobject 2
2778 * 0006 opt_plus <ic:1>
2779 * 0008 dup
2780 * 0009 setlocal num, 0
2781 * 0012 leave
2782 *
2783 */
2784static VALUE
2785iseqw_s_disasm(VALUE klass, VALUE body)
2786{
2787 VALUE iseqw = iseqw_s_of(klass, body);
2788 return NIL_P(iseqw) ? Qnil : rb_iseq_disasm(iseqw_check(iseqw));
2789}
2790
2791const char *
2792ruby_node_name(int node)
2793{
2794 switch (node) {
2795#include "node_name.inc"
2796 default:
2797 rb_bug("unknown node: %d", node);
2798 return 0;
2799 }
2800}
2801
2802static VALUE
2803register_label(struct st_table *table, unsigned long idx)
2804{
2805 VALUE sym = rb_str_intern(rb_sprintf("label_%lu", idx));
2806 st_insert(table, idx, sym);
2807 return sym;
2808}
2809
2810static VALUE
2811exception_type2symbol(VALUE type)
2812{
2813 ID id;
2814 switch (type) {
2815 case CATCH_TYPE_RESCUE: CONST_ID(id, "rescue"); break;
2816 case CATCH_TYPE_ENSURE: CONST_ID(id, "ensure"); break;
2817 case CATCH_TYPE_RETRY: CONST_ID(id, "retry"); break;
2818 case CATCH_TYPE_BREAK: CONST_ID(id, "break"); break;
2819 case CATCH_TYPE_REDO: CONST_ID(id, "redo"); break;
2820 case CATCH_TYPE_NEXT: CONST_ID(id, "next"); break;
2821 default:
2822 rb_bug("unknown exception type: %d", (int)type);
2823 }
2824 return ID2SYM(id);
2825}
2826
2827static int
2828cdhash_each(VALUE key, VALUE value, VALUE ary)
2829{
2830 rb_ary_push(ary, obj_resurrect(key));
2831 rb_ary_push(ary, value);
2832 return ST_CONTINUE;
2833}
2834
2835static const rb_data_type_t label_wrapper = {
2836 "label_wrapper",
2837 {(void (*)(void *))rb_mark_tbl, (void (*)(void *))st_free_table, 0, 0,},
2838 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
2839};
2840
2841#define DECL_ID(name) \
2842 static ID id_##name
2843
2844#define INIT_ID(name) \
2845 id_##name = rb_intern(#name)
2846
2847static VALUE
2848iseq_type_id(enum rb_iseq_type type)
2849{
2850 DECL_ID(top);
2851 DECL_ID(method);
2852 DECL_ID(block);
2853 DECL_ID(class);
2854 DECL_ID(rescue);
2855 DECL_ID(ensure);
2856 DECL_ID(eval);
2857 DECL_ID(main);
2858 DECL_ID(plain);
2859
2860 if (id_top == 0) {
2861 INIT_ID(top);
2862 INIT_ID(method);
2863 INIT_ID(block);
2864 INIT_ID(class);
2865 INIT_ID(rescue);
2866 INIT_ID(ensure);
2867 INIT_ID(eval);
2868 INIT_ID(main);
2869 INIT_ID(plain);
2870 }
2871
2872 switch (type) {
2873 case ISEQ_TYPE_TOP: return id_top;
2874 case ISEQ_TYPE_METHOD: return id_method;
2875 case ISEQ_TYPE_BLOCK: return id_block;
2876 case ISEQ_TYPE_CLASS: return id_class;
2877 case ISEQ_TYPE_RESCUE: return id_rescue;
2878 case ISEQ_TYPE_ENSURE: return id_ensure;
2879 case ISEQ_TYPE_EVAL: return id_eval;
2880 case ISEQ_TYPE_MAIN: return id_main;
2881 case ISEQ_TYPE_PLAIN: return id_plain;
2882 };
2883
2884 rb_bug("unsupported iseq type: %d", (int)type);
2885}
2886
2887static VALUE
2888iseq_data_to_ary(const rb_iseq_t *iseq)
2889{
2890 unsigned int i;
2891 long l;
2892 const struct rb_iseq_constant_body *const iseq_body = ISEQ_BODY(iseq);
2893 const struct iseq_insn_info_entry *prev_insn_info;
2894 unsigned int pos;
2895 int last_line = 0;
2896 VALUE *seq, *iseq_original;
2897
2898 VALUE val = rb_ary_new();
2899 ID type; /* Symbol */
2900 VALUE locals = rb_ary_new();
2901 VALUE params = rb_hash_new();
2902 VALUE body = rb_ary_new(); /* [[:insn1, ...], ...] */
2903 VALUE nbody;
2904 VALUE exception = rb_ary_new(); /* [[....]] */
2905 VALUE misc = rb_hash_new();
2906
2907 static ID insn_syms[VM_INSTRUCTION_SIZE/2]; /* w/o-trace only */
2908 struct st_table *labels_table = st_init_numtable();
2909 VALUE labels_wrapper = TypedData_Wrap_Struct(0, &label_wrapper, labels_table);
2910
2911 if (insn_syms[0] == 0) {
2912 int i;
2913 for (i=0; i<numberof(insn_syms); i++) {
2914 insn_syms[i] = rb_intern(insn_name(i));
2915 }
2916 }
2917
2918 /* type */
2919 type = iseq_type_id(iseq_body->type);
2920
2921 /* locals */
2922 for (i=0; i<iseq_body->local_table_size; i++) {
2923 ID lid = iseq_body->local_table[i];
2924 if (lid) {
2925 if (rb_id2str(lid)) {
2926 rb_ary_push(locals, ID2SYM(lid));
2927 }
2928 else { /* hidden variable from id_internal() */
2929 rb_ary_push(locals, ULONG2NUM(iseq_body->local_table_size-i+1));
2930 }
2931 }
2932 else {
2933 rb_ary_push(locals, ID2SYM(rb_intern("#arg_rest")));
2934 }
2935 }
2936
2937 /* params */
2938 {
2939 const struct rb_iseq_param_keyword *const keyword = iseq_body->param.keyword;
2940 int j;
2941
2942 if (iseq_body->param.flags.has_opt) {
2943 int len = iseq_body->param.opt_num + 1;
2944 VALUE arg_opt_labels = rb_ary_new2(len);
2945
2946 for (j = 0; j < len; j++) {
2947 VALUE l = register_label(labels_table, iseq_body->param.opt_table[j]);
2948 rb_ary_push(arg_opt_labels, l);
2949 }
2950 rb_hash_aset(params, ID2SYM(rb_intern("opt")), arg_opt_labels);
2951 }
2952
2953 /* commit */
2954 if (iseq_body->param.flags.has_lead) rb_hash_aset(params, ID2SYM(rb_intern("lead_num")), INT2FIX(iseq_body->param.lead_num));
2955 if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_num")), INT2FIX(iseq_body->param.post_num));
2956 if (iseq_body->param.flags.has_post) rb_hash_aset(params, ID2SYM(rb_intern("post_start")), INT2FIX(iseq_body->param.post_start));
2957 if (iseq_body->param.flags.has_rest) rb_hash_aset(params, ID2SYM(rb_intern("rest_start")), INT2FIX(iseq_body->param.rest_start));
2958 if (iseq_body->param.flags.has_block) rb_hash_aset(params, ID2SYM(rb_intern("block_start")), INT2FIX(iseq_body->param.block_start));
2959 if (iseq_body->param.flags.has_kw) {
2960 VALUE keywords = rb_ary_new();
2961 int i, j;
2962 for (i=0; i<keyword->required_num; i++) {
2963 rb_ary_push(keywords, ID2SYM(keyword->table[i]));
2964 }
2965 for (j=0; i<keyword->num; i++, j++) {
2966 VALUE key = rb_ary_new_from_args(1, ID2SYM(keyword->table[i]));
2967 if (!UNDEF_P(keyword->default_values[j])) {
2968 rb_ary_push(key, keyword->default_values[j]);
2969 }
2970 rb_ary_push(keywords, key);
2971 }
2972
2973 rb_hash_aset(params, ID2SYM(rb_intern("kwbits")),
2974 INT2FIX(keyword->bits_start));
2975 rb_hash_aset(params, ID2SYM(rb_intern("keyword")), keywords);
2976 }
2977 if (iseq_body->param.flags.has_kwrest) rb_hash_aset(params, ID2SYM(rb_intern("kwrest")), INT2FIX(keyword->rest_start));
2978 if (iseq_body->param.flags.ambiguous_param0) rb_hash_aset(params, ID2SYM(rb_intern("ambiguous_param0")), Qtrue);
2979 }
2980
2981 /* body */
2982 iseq_original = rb_iseq_original_iseq((rb_iseq_t *)iseq);
2983
2984 for (seq = iseq_original; seq < iseq_original + iseq_body->iseq_size; ) {
2985 VALUE insn = *seq++;
2986 int j, len = insn_len(insn);
2987 VALUE *nseq = seq + len - 1;
2988 VALUE ary = rb_ary_new2(len);
2989
2990 rb_ary_push(ary, ID2SYM(insn_syms[insn%numberof(insn_syms)]));
2991 for (j=0; j<len-1; j++, seq++) {
2992 enum ruby_insn_type_chars op_type = insn_op_type(insn, j);
2993
2994 switch (op_type) {
2995 case TS_OFFSET: {
2996 unsigned long idx = nseq - iseq_original + *seq;
2997 rb_ary_push(ary, register_label(labels_table, idx));
2998 break;
2999 }
3000 case TS_LINDEX:
3001 case TS_NUM:
3002 rb_ary_push(ary, INT2FIX(*seq));
3003 break;
3004 case TS_VALUE:
3005 rb_ary_push(ary, obj_resurrect(*seq));
3006 break;
3007 case TS_ISEQ:
3008 {
3009 const rb_iseq_t *iseq = (rb_iseq_t *)*seq;
3010 if (iseq) {
3011 VALUE val = iseq_data_to_ary(rb_iseq_check(iseq));
3012 rb_ary_push(ary, val);
3013 }
3014 else {
3015 rb_ary_push(ary, Qnil);
3016 }
3017 }
3018 break;
3019 case TS_IC:
3020 {
3021 VALUE list = rb_ary_new();
3022 const ID *ids = ((IC)*seq)->segments;
3023 while (*ids) {
3024 rb_ary_push(list, ID2SYM(*ids++));
3025 }
3026 rb_ary_push(ary, list);
3027 }
3028 break;
3029 case TS_IVC:
3030 case TS_ICVARC:
3031 case TS_ISE:
3032 {
3033 union iseq_inline_storage_entry *is = (union iseq_inline_storage_entry *)*seq;
3034 rb_ary_push(ary, INT2FIX(is - ISEQ_IS_ENTRY_START(ISEQ_BODY(iseq), op_type)));
3035 }
3036 break;
3037 case TS_CALLDATA:
3038 {
3039 struct rb_call_data *cd = (struct rb_call_data *)*seq;
3040 const struct rb_callinfo *ci = cd->ci;
3041 VALUE e = rb_hash_new();
3042 int argc = vm_ci_argc(ci);
3043
3044 ID mid = vm_ci_mid(ci);
3045 rb_hash_aset(e, ID2SYM(rb_intern("mid")), mid ? ID2SYM(mid) : Qnil);
3046 rb_hash_aset(e, ID2SYM(rb_intern("flag")), UINT2NUM(vm_ci_flag(ci)));
3047
3048 if (vm_ci_flag(ci) & VM_CALL_KWARG) {
3049 const struct rb_callinfo_kwarg *kwarg = vm_ci_kwarg(ci);
3050 int i;
3051 VALUE kw = rb_ary_new2((long)kwarg->keyword_len);
3052
3053 argc -= kwarg->keyword_len;
3054 for (i = 0; i < kwarg->keyword_len; i++) {
3055 rb_ary_push(kw, kwarg->keywords[i]);
3056 }
3057 rb_hash_aset(e, ID2SYM(rb_intern("kw_arg")), kw);
3058 }
3059
3060 rb_hash_aset(e, ID2SYM(rb_intern("orig_argc")),
3061 INT2FIX(argc));
3062 rb_ary_push(ary, e);
3063 }
3064 break;
3065 case TS_ID:
3066 rb_ary_push(ary, ID2SYM(*seq));
3067 break;
3068 case TS_CDHASH:
3069 {
3070 VALUE hash = *seq;
3071 VALUE val = rb_ary_new();
3072 int i;
3073
3074 rb_hash_foreach(hash, cdhash_each, val);
3075
3076 for (i=0; i<RARRAY_LEN(val); i+=2) {
3077 VALUE pos = FIX2INT(rb_ary_entry(val, i+1));
3078 unsigned long idx = nseq - iseq_original + pos;
3079
3080 rb_ary_store(val, i+1,
3081 register_label(labels_table, idx));
3082 }
3083 rb_ary_push(ary, val);
3084 }
3085 break;
3086 case TS_FUNCPTR:
3087 {
3088#if SIZEOF_VALUE <= SIZEOF_LONG
3089 VALUE val = LONG2NUM((SIGNED_VALUE)*seq);
3090#else
3091 VALUE val = LL2NUM((SIGNED_VALUE)*seq);
3092#endif
3093 rb_ary_push(ary, val);
3094 }
3095 break;
3096 case TS_BUILTIN:
3097 {
3098 VALUE val = rb_hash_new();
3099#if SIZEOF_VALUE <= SIZEOF_LONG
3100 VALUE func_ptr = LONG2NUM((SIGNED_VALUE)((RB_BUILTIN)*seq)->func_ptr);
3101#else
3102 VALUE func_ptr = LL2NUM((SIGNED_VALUE)((RB_BUILTIN)*seq)->func_ptr);
3103#endif
3104 rb_hash_aset(val, ID2SYM(rb_intern("func_ptr")), func_ptr);
3105 rb_hash_aset(val, ID2SYM(rb_intern("argc")), INT2NUM(((RB_BUILTIN)*seq)->argc));
3106 rb_hash_aset(val, ID2SYM(rb_intern("index")), INT2NUM(((RB_BUILTIN)*seq)->index));
3107 rb_hash_aset(val, ID2SYM(rb_intern("name")), rb_str_new_cstr(((RB_BUILTIN)*seq)->name));
3108 rb_ary_push(ary, val);
3109 }
3110 break;
3111 default:
3112 rb_bug("unknown operand: %c", insn_op_type(insn, j));
3113 }
3114 }
3115 rb_ary_push(body, ary);
3116 }
3117
3118 nbody = body;
3119
3120 /* exception */
3121 if (iseq_body->catch_table) for (i=0; i<iseq_body->catch_table->size; i++) {
3122 VALUE ary = rb_ary_new();
3123 const struct iseq_catch_table_entry *entry =
3124 UNALIGNED_MEMBER_PTR(iseq_body->catch_table, entries[i]);
3125 rb_ary_push(ary, exception_type2symbol(entry->type));
3126 if (entry->iseq) {
3127 rb_ary_push(ary, iseq_data_to_ary(rb_iseq_check(entry->iseq)));
3128 }
3129 else {
3130 rb_ary_push(ary, Qnil);
3131 }
3132 rb_ary_push(ary, register_label(labels_table, entry->start));
3133 rb_ary_push(ary, register_label(labels_table, entry->end));
3134 rb_ary_push(ary, register_label(labels_table, entry->cont));
3135 rb_ary_push(ary, UINT2NUM(entry->sp));
3136 rb_ary_push(exception, ary);
3137 }
3138
3139 /* make body with labels and insert line number */
3140 body = rb_ary_new();
3141 prev_insn_info = NULL;
3142#ifdef USE_ISEQ_NODE_ID
3143 VALUE node_ids = rb_ary_new();
3144#endif
3145
3146 for (l=0, pos=0; l<RARRAY_LEN(nbody); l++) {
3147 const struct iseq_insn_info_entry *info;
3148 VALUE ary = RARRAY_AREF(nbody, l);
3149 st_data_t label;
3150
3151 if (st_lookup(labels_table, pos, &label)) {
3152 rb_ary_push(body, (VALUE)label);
3153 }
3154
3155 info = get_insn_info(iseq, pos);
3156#ifdef USE_ISEQ_NODE_ID
3157 rb_ary_push(node_ids, INT2FIX(info->node_id));
3158#endif
3159
3160 if (prev_insn_info != info) {
3161 int line = info->line_no;
3162 rb_event_flag_t events = info->events;
3163
3164 if (line > 0 && last_line != line) {
3165 rb_ary_push(body, INT2FIX(line));
3166 last_line = line;
3167 }
3168#define CHECK_EVENT(ev) if (events & ev) rb_ary_push(body, ID2SYM(rb_intern(#ev)));
3169 CHECK_EVENT(RUBY_EVENT_LINE);
3170 CHECK_EVENT(RUBY_EVENT_CLASS);
3171 CHECK_EVENT(RUBY_EVENT_END);
3172 CHECK_EVENT(RUBY_EVENT_CALL);
3173 CHECK_EVENT(RUBY_EVENT_RETURN);
3174 CHECK_EVENT(RUBY_EVENT_B_CALL);
3175 CHECK_EVENT(RUBY_EVENT_B_RETURN);
3176#undef CHECK_EVENT
3177 prev_insn_info = info;
3178 }
3179
3180 rb_ary_push(body, ary);
3181 pos += RARRAY_LENINT(ary); /* reject too huge data */
3182 }
3183 RB_GC_GUARD(nbody);
3184 RB_GC_GUARD(labels_wrapper);
3185
3186 rb_hash_aset(misc, ID2SYM(rb_intern("arg_size")), INT2FIX(iseq_body->param.size));
3187 rb_hash_aset(misc, ID2SYM(rb_intern("local_size")), INT2FIX(iseq_body->local_table_size));
3188 rb_hash_aset(misc, ID2SYM(rb_intern("stack_max")), INT2FIX(iseq_body->stack_max));
3189 rb_hash_aset(misc, ID2SYM(rb_intern("node_id")), INT2FIX(iseq_body->location.node_id));
3190 rb_hash_aset(misc, ID2SYM(rb_intern("code_location")),
3191 rb_ary_new_from_args(4,
3192 INT2FIX(iseq_body->location.code_location.beg_pos.lineno),
3193 INT2FIX(iseq_body->location.code_location.beg_pos.column),
3194 INT2FIX(iseq_body->location.code_location.end_pos.lineno),
3195 INT2FIX(iseq_body->location.code_location.end_pos.column)));
3196#ifdef USE_ISEQ_NODE_ID
3197 rb_hash_aset(misc, ID2SYM(rb_intern("node_ids")), node_ids);
3198#endif
3199
3200 /*
3201 * [:magic, :major_version, :minor_version, :format_type, :misc,
3202 * :name, :path, :absolute_path, :start_lineno, :type, :locals, :args,
3203 * :catch_table, :bytecode]
3204 */
3205 rb_ary_push(val, rb_str_new2("YARVInstructionSequence/SimpleDataFormat"));
3206 rb_ary_push(val, INT2FIX(ISEQ_MAJOR_VERSION)); /* major */
3207 rb_ary_push(val, INT2FIX(ISEQ_MINOR_VERSION)); /* minor */
3208 rb_ary_push(val, INT2FIX(1));
3209 rb_ary_push(val, misc);
3210 rb_ary_push(val, iseq_body->location.label);
3211 rb_ary_push(val, rb_iseq_path(iseq));
3212 rb_ary_push(val, rb_iseq_realpath(iseq));
3213 rb_ary_push(val, RB_INT2NUM(iseq_body->location.first_lineno));
3214 rb_ary_push(val, ID2SYM(type));
3215 rb_ary_push(val, locals);
3216 rb_ary_push(val, params);
3217 rb_ary_push(val, exception);
3218 rb_ary_push(val, body);
3219 return val;
3220}
3221
3222VALUE
3223rb_iseq_parameters(const rb_iseq_t *iseq, int is_proc)
3224{
3225 int i, r;
3226 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3227 const struct rb_iseq_param_keyword *const keyword = body->param.keyword;
3228 VALUE a, args = rb_ary_new2(body->param.size);
3229 ID req, opt, rest, block, key, keyrest;
3230#define PARAM_TYPE(type) rb_ary_push(a = rb_ary_new2(2), ID2SYM(type))
3231#define PARAM_ID(i) body->local_table[(i)]
3232#define PARAM(i, type) ( \
3233 PARAM_TYPE(type), \
3234 rb_id2str(PARAM_ID(i)) ? \
3235 rb_ary_push(a, ID2SYM(PARAM_ID(i))) : \
3236 a)
3237
3238 CONST_ID(req, "req");
3239 CONST_ID(opt, "opt");
3240 if (is_proc) {
3241 for (i = 0; i < body->param.lead_num; i++) {
3242 PARAM_TYPE(opt);
3243 rb_ary_push(a, rb_id2str(PARAM_ID(i)) ? ID2SYM(PARAM_ID(i)) : Qnil);
3244 rb_ary_push(args, a);
3245 }
3246 }
3247 else {
3248 for (i = 0; i < body->param.lead_num; i++) {
3249 rb_ary_push(args, PARAM(i, req));
3250 }
3251 }
3252 r = body->param.lead_num + body->param.opt_num;
3253 for (; i < r; i++) {
3254 PARAM_TYPE(opt);
3255 if (rb_id2str(PARAM_ID(i))) {
3256 rb_ary_push(a, ID2SYM(PARAM_ID(i)));
3257 }
3258 rb_ary_push(args, a);
3259 }
3260 if (body->param.flags.has_rest) {
3261 CONST_ID(rest, "rest");
3262 rb_ary_push(args, PARAM(body->param.rest_start, rest));
3263 }
3264 r = body->param.post_start + body->param.post_num;
3265 if (is_proc) {
3266 for (i = body->param.post_start; i < r; i++) {
3267 PARAM_TYPE(opt);
3268 rb_ary_push(a, rb_id2str(PARAM_ID(i)) ? ID2SYM(PARAM_ID(i)) : Qnil);
3269 rb_ary_push(args, a);
3270 }
3271 }
3272 else {
3273 for (i = body->param.post_start; i < r; i++) {
3274 rb_ary_push(args, PARAM(i, req));
3275 }
3276 }
3277 if (body->param.flags.accepts_no_kwarg) {
3278 ID nokey;
3279 CONST_ID(nokey, "nokey");
3280 PARAM_TYPE(nokey);
3281 rb_ary_push(args, a);
3282 }
3283 if (body->param.flags.has_kw) {
3284 i = 0;
3285 if (keyword->required_num > 0) {
3286 ID keyreq;
3287 CONST_ID(keyreq, "keyreq");
3288 for (; i < keyword->required_num; i++) {
3289 PARAM_TYPE(keyreq);
3290 if (rb_id2str(keyword->table[i])) {
3291 rb_ary_push(a, ID2SYM(keyword->table[i]));
3292 }
3293 rb_ary_push(args, a);
3294 }
3295 }
3296 CONST_ID(key, "key");
3297 for (; i < keyword->num; i++) {
3298 PARAM_TYPE(key);
3299 if (rb_id2str(keyword->table[i])) {
3300 rb_ary_push(a, ID2SYM(keyword->table[i]));
3301 }
3302 rb_ary_push(args, a);
3303 }
3304 }
3305 if (body->param.flags.has_kwrest || body->param.flags.ruby2_keywords) {
3306 ID param;
3307 CONST_ID(keyrest, "keyrest");
3308 PARAM_TYPE(keyrest);
3309 if (body->param.flags.has_kwrest &&
3310 rb_id2str(param = PARAM_ID(keyword->rest_start))) {
3311 rb_ary_push(a, ID2SYM(param));
3312 }
3313 else if (body->param.flags.ruby2_keywords) {
3314 rb_ary_push(a, ID2SYM(idPow));
3315 }
3316 rb_ary_push(args, a);
3317 }
3318 if (body->param.flags.has_block) {
3319 CONST_ID(block, "block");
3320 rb_ary_push(args, PARAM(body->param.block_start, block));
3321 }
3322 return args;
3323}
3324
3325VALUE
3326rb_iseq_defined_string(enum defined_type type)
3327{
3328 static const char expr_names[][18] = {
3329 "nil",
3330 "instance-variable",
3331 "local-variable",
3332 "global-variable",
3333 "class variable",
3334 "constant",
3335 "method",
3336 "yield",
3337 "super",
3338 "self",
3339 "true",
3340 "false",
3341 "assignment",
3342 "expression",
3343 };
3344 const char *estr;
3345
3346 if ((unsigned)(type - 1) >= (unsigned)numberof(expr_names)) rb_bug("unknown defined type %d", type);
3347 estr = expr_names[type - 1];
3348 return rb_fstring_cstr(estr);
3349}
3350
3351/* A map from encoded_insn to insn_data: decoded insn number, its len,
3352 * non-trace version of encoded insn, and trace version. */
3353
3354static st_table *encoded_insn_data;
3355typedef struct insn_data_struct {
3356 int insn;
3357 int insn_len;
3358 void *notrace_encoded_insn;
3359 void *trace_encoded_insn;
3360} insn_data_t;
3361static insn_data_t insn_data[VM_INSTRUCTION_SIZE/2];
3362
3363void
3364rb_vm_encoded_insn_data_table_init(void)
3365{
3366#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
3367 const void * const *table = rb_vm_get_insns_address_table();
3368#define INSN_CODE(insn) ((VALUE)table[insn])
3369#else
3370#define INSN_CODE(insn) (insn)
3371#endif
3372 st_data_t insn;
3373 encoded_insn_data = st_init_numtable_with_size(VM_INSTRUCTION_SIZE / 2);
3374
3375 for (insn = 0; insn < VM_INSTRUCTION_SIZE/2; insn++) {
3376 st_data_t key1 = (st_data_t)INSN_CODE(insn);
3377 st_data_t key2 = (st_data_t)INSN_CODE(insn + VM_INSTRUCTION_SIZE/2);
3378
3379 insn_data[insn].insn = (int)insn;
3380 insn_data[insn].insn_len = insn_len(insn);
3381
3382 if (insn != BIN(opt_invokebuiltin_delegate_leave)) {
3383 insn_data[insn].notrace_encoded_insn = (void *) key1;
3384 insn_data[insn].trace_encoded_insn = (void *) key2;
3385 }
3386 else {
3387 insn_data[insn].notrace_encoded_insn = (void *) INSN_CODE(BIN(opt_invokebuiltin_delegate));
3388 insn_data[insn].trace_encoded_insn = (void *) INSN_CODE(BIN(opt_invokebuiltin_delegate) + VM_INSTRUCTION_SIZE/2);
3389 }
3390
3391 st_add_direct(encoded_insn_data, key1, (st_data_t)&insn_data[insn]);
3392 st_add_direct(encoded_insn_data, key2, (st_data_t)&insn_data[insn]);
3393 }
3394}
3395
3396int
3397rb_vm_insn_addr2insn(const void *addr)
3398{
3399 st_data_t key = (st_data_t)addr;
3400 st_data_t val;
3401
3402 if (st_lookup(encoded_insn_data, key, &val)) {
3403 insn_data_t *e = (insn_data_t *)val;
3404 return (int)e->insn;
3405 }
3406
3407 rb_bug("rb_vm_insn_addr2insn: invalid insn address: %p", addr);
3408}
3409
3410// Unlike rb_vm_insn_addr2insn, this function can return trace opcode variants.
3411int
3412rb_vm_insn_addr2opcode(const void *addr)
3413{
3414 st_data_t key = (st_data_t)addr;
3415 st_data_t val;
3416
3417 if (st_lookup(encoded_insn_data, key, &val)) {
3418 insn_data_t *e = (insn_data_t *)val;
3419 int opcode = e->insn;
3420 if (addr == e->trace_encoded_insn) {
3421 opcode += VM_INSTRUCTION_SIZE/2;
3422 }
3423 return opcode;
3424 }
3425
3426 rb_bug("rb_vm_insn_addr2opcode: invalid insn address: %p", addr);
3427}
3428
3429// Decode `ISEQ_BODY(iseq)->iseq_encoded[i]` to an insn.
3430int
3431rb_vm_insn_decode(const VALUE encoded)
3432{
3433#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
3434 int insn = rb_vm_insn_addr2insn((void *)encoded);
3435#else
3436 int insn = (int)encoded;
3437#endif
3438 return insn;
3439}
3440
3441static inline int
3442encoded_iseq_trace_instrument(VALUE *iseq_encoded_insn, rb_event_flag_t turnon, bool remain_current_trace)
3443{
3444 st_data_t key = (st_data_t)*iseq_encoded_insn;
3445 st_data_t val;
3446
3447 if (st_lookup(encoded_insn_data, key, &val)) {
3448 insn_data_t *e = (insn_data_t *)val;
3449 if (remain_current_trace && key == (st_data_t)e->trace_encoded_insn) {
3450 turnon = 1;
3451 }
3452 *iseq_encoded_insn = (VALUE) (turnon ? e->trace_encoded_insn : e->notrace_encoded_insn);
3453 return e->insn_len;
3454 }
3455
3456 rb_bug("trace_instrument: invalid insn address: %p", (void *)*iseq_encoded_insn);
3457}
3458
3459void
3460rb_iseq_trace_flag_cleared(const rb_iseq_t *iseq, size_t pos)
3461{
3462 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3463 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3464 encoded_iseq_trace_instrument(&iseq_encoded[pos], 0, false);
3465}
3466
3467// We need to fire call events on instructions with b_call events if the block
3468// is running as a method. So, if we are listening for call events, then
3469// instructions that have b_call events need to become trace variants.
3470// Use this function when making decisions about recompiling to trace variants.
3471static inline rb_event_flag_t
3472add_bmethod_events(rb_event_flag_t events)
3473{
3474 if (events & RUBY_EVENT_CALL) {
3475 events |= RUBY_EVENT_B_CALL;
3476 }
3477 if (events & RUBY_EVENT_RETURN) {
3478 events |= RUBY_EVENT_B_RETURN;
3479 }
3480 return events;
3481}
3482
3483// Note, to support call/return events for bmethods, turnon_event can have more events than tpval.
3484static int
3485iseq_add_local_tracepoint(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line)
3486{
3487 unsigned int pc;
3488 int n = 0;
3489 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3490 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3491
3492 VM_ASSERT(ISEQ_EXECUTABLE_P(iseq));
3493
3494 for (pc=0; pc<body->iseq_size;) {
3495 const struct iseq_insn_info_entry *entry = get_insn_info(iseq, pc);
3496 rb_event_flag_t pc_events = entry->events;
3497 rb_event_flag_t target_events = turnon_events;
3498 unsigned int line = (int)entry->line_no;
3499
3500 if (target_line == 0 || target_line == line) {
3501 /* ok */
3502 }
3503 else {
3504 target_events &= ~RUBY_EVENT_LINE;
3505 }
3506
3507 if (pc_events & target_events) {
3508 n++;
3509 }
3510 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (target_events | iseq->aux.exec.global_trace_events), true);
3511 }
3512
3513 if (n > 0) {
3514 if (iseq->aux.exec.local_hooks == NULL) {
3515 ((rb_iseq_t *)iseq)->aux.exec.local_hooks = RB_ZALLOC(rb_hook_list_t);
3516 iseq->aux.exec.local_hooks->is_local = true;
3517 }
3518 rb_hook_list_connect_tracepoint((VALUE)iseq, iseq->aux.exec.local_hooks, tpval, target_line);
3519 }
3520
3521 return n;
3522}
3523
3525 rb_event_flag_t turnon_events;
3526 VALUE tpval;
3527 unsigned int target_line;
3528 int n;
3529};
3530
3531static void
3532iseq_add_local_tracepoint_i(const rb_iseq_t *iseq, void *p)
3533{
3535 data->n += iseq_add_local_tracepoint(iseq, data->turnon_events, data->tpval, data->target_line);
3536 iseq_iterate_children(iseq, iseq_add_local_tracepoint_i, p);
3537}
3538
3539int
3540rb_iseq_add_local_tracepoint_recursively(const rb_iseq_t *iseq, rb_event_flag_t turnon_events, VALUE tpval, unsigned int target_line, bool target_bmethod)
3541{
3543 if (target_bmethod) {
3544 turnon_events = add_bmethod_events(turnon_events);
3545 }
3546 data.turnon_events = turnon_events;
3547 data.tpval = tpval;
3548 data.target_line = target_line;
3549 data.n = 0;
3550
3551 iseq_add_local_tracepoint_i(iseq, (void *)&data);
3552 if (0) rb_funcall(Qnil, rb_intern("puts"), 1, rb_iseq_disasm(iseq)); /* for debug */
3553 return data.n;
3554}
3555
3556static int
3557iseq_remove_local_tracepoint(const rb_iseq_t *iseq, VALUE tpval)
3558{
3559 int n = 0;
3560
3561 if (iseq->aux.exec.local_hooks) {
3562 unsigned int pc;
3563 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3564 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3565 rb_event_flag_t local_events = 0;
3566
3567 rb_hook_list_remove_tracepoint(iseq->aux.exec.local_hooks, tpval);
3568 local_events = iseq->aux.exec.local_hooks->events;
3569
3570 if (local_events == 0) {
3571 rb_hook_list_free(iseq->aux.exec.local_hooks);
3572 ((rb_iseq_t *)iseq)->aux.exec.local_hooks = NULL;
3573 }
3574
3575 local_events = add_bmethod_events(local_events);
3576 for (pc = 0; pc<body->iseq_size;) {
3577 rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc);
3578 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & (local_events | iseq->aux.exec.global_trace_events), false);
3579 }
3580 }
3581 return n;
3582}
3583
3585 VALUE tpval;
3586 int n;
3587};
3588
3589static void
3590iseq_remove_local_tracepoint_i(const rb_iseq_t *iseq, void *p)
3591{
3593 data->n += iseq_remove_local_tracepoint(iseq, data->tpval);
3594 iseq_iterate_children(iseq, iseq_remove_local_tracepoint_i, p);
3595}
3596
3597int
3598rb_iseq_remove_local_tracepoint_recursively(const rb_iseq_t *iseq, VALUE tpval)
3599{
3601 data.tpval = tpval;
3602 data.n = 0;
3603
3604 iseq_remove_local_tracepoint_i(iseq, (void *)&data);
3605 return data.n;
3606}
3607
3608void
3609rb_iseq_trace_set(const rb_iseq_t *iseq, rb_event_flag_t turnon_events)
3610{
3611 if (iseq->aux.exec.global_trace_events == turnon_events) {
3612 return;
3613 }
3614
3615 if (!ISEQ_EXECUTABLE_P(iseq)) {
3616 /* this is building ISeq */
3617 return;
3618 }
3619 else {
3620 unsigned int pc;
3621 const struct rb_iseq_constant_body *const body = ISEQ_BODY(iseq);
3622 VALUE *iseq_encoded = (VALUE *)body->iseq_encoded;
3623 rb_event_flag_t enabled_events;
3624 rb_event_flag_t local_events = iseq->aux.exec.local_hooks ? iseq->aux.exec.local_hooks->events : 0;
3625 ((rb_iseq_t *)iseq)->aux.exec.global_trace_events = turnon_events;
3626 enabled_events = add_bmethod_events(turnon_events | local_events);
3627
3628 for (pc=0; pc<body->iseq_size;) {
3629 rb_event_flag_t pc_events = rb_iseq_event_flags(iseq, pc);
3630 pc += encoded_iseq_trace_instrument(&iseq_encoded[pc], pc_events & enabled_events, true);
3631 }
3632 }
3633}
3634
3635bool rb_vm_call_ivar_attrset_p(const vm_call_handler ch);
3636void rb_vm_cc_general(const struct rb_callcache *cc);
3637
3638static int
3639clear_attr_ccs_i(void *vstart, void *vend, size_t stride, void *data)
3640{
3641 VALUE v = (VALUE)vstart;
3642 for (; v != (VALUE)vend; v += stride) {
3643 void *ptr = asan_poisoned_object_p(v);
3644 asan_unpoison_object(v, false);
3645
3646 if (imemo_type_p(v, imemo_callcache) && rb_vm_call_ivar_attrset_p(((const struct rb_callcache *)v)->call_)) {
3647 rb_vm_cc_general((struct rb_callcache *)v);
3648 }
3649
3650 asan_poison_object_if(ptr, v);
3651 }
3652 return 0;
3653}
3654
3655void
3656rb_clear_attr_ccs(void)
3657{
3658 rb_objspace_each_objects(clear_attr_ccs_i, NULL);
3659}
3660
3661static int
3662trace_set_i(void *vstart, void *vend, size_t stride, void *data)
3663{
3664 rb_event_flag_t turnon_events = *(rb_event_flag_t *)data;
3665
3666 VALUE v = (VALUE)vstart;
3667 for (; v != (VALUE)vend; v += stride) {
3668 void *ptr = asan_poisoned_object_p(v);
3669 asan_unpoison_object(v, false);
3670
3671 if (rb_obj_is_iseq(v)) {
3672 rb_iseq_trace_set(rb_iseq_check((rb_iseq_t *)v), turnon_events);
3673 }
3674 else if (imemo_type_p(v, imemo_callcache) && rb_vm_call_ivar_attrset_p(((const struct rb_callcache *)v)->call_)) {
3675 rb_vm_cc_general((struct rb_callcache *)v);
3676 }
3677
3678 asan_poison_object_if(ptr, v);
3679 }
3680 return 0;
3681}
3682
3683void
3684rb_iseq_trace_set_all(rb_event_flag_t turnon_events)
3685{
3686 rb_objspace_each_objects(trace_set_i, &turnon_events);
3687}
3688
3689VALUE
3690rb_iseqw_local_variables(VALUE iseqval)
3691{
3692 return rb_iseq_local_variables(iseqw_check(iseqval));
3693}
3694
3695/*
3696 * call-seq:
3697 * iseq.to_binary(extra_data = nil) -> binary str
3698 *
3699 * Returns serialized iseq binary format data as a String object.
3700 * A corresponding iseq object is created by
3701 * RubyVM::InstructionSequence.load_from_binary() method.
3702 *
3703 * String extra_data will be saved with binary data.
3704 * You can access this data with
3705 * RubyVM::InstructionSequence.load_from_binary_extra_data(binary).
3706 *
3707 * Note that the translated binary data is not portable.
3708 * You can not move this binary data to another machine.
3709 * You can not use the binary data which is created by another
3710 * version/another architecture of Ruby.
3711 */
3712static VALUE
3713iseqw_to_binary(int argc, VALUE *argv, VALUE self)
3714{
3715 VALUE opt = !rb_check_arity(argc, 0, 1) ? Qnil : argv[0];
3716 return rb_iseq_ibf_dump(iseqw_check(self), opt);
3717}
3718
3719/*
3720 * call-seq:
3721 * RubyVM::InstructionSequence.load_from_binary(binary) -> iseq
3722 *
3723 * Load an iseq object from binary format String object
3724 * created by RubyVM::InstructionSequence.to_binary.
3725 *
3726 * This loader does not have a verifier, so that loading broken/modified
3727 * binary causes critical problem.
3728 *
3729 * You should not load binary data provided by others.
3730 * You should use binary data translated by yourself.
3731 */
3732static VALUE
3733iseqw_s_load_from_binary(VALUE self, VALUE str)
3734{
3735 return iseqw_new(rb_iseq_ibf_load(str));
3736}
3737
3738/*
3739 * call-seq:
3740 * RubyVM::InstructionSequence.load_from_binary_extra_data(binary) -> str
3741 *
3742 * Load extra data embed into binary format String object.
3743 */
3744static VALUE
3745iseqw_s_load_from_binary_extra_data(VALUE self, VALUE str)
3746{
3747 return rb_iseq_ibf_load_extra_data(str);
3748}
3749
3750#if VM_INSN_INFO_TABLE_IMPL == 2
3751
3752/* An implementation of succinct bit-vector for insn_info table.
3753 *
3754 * A succinct bit-vector is a small and efficient data structure that provides
3755 * a bit-vector augmented with an index for O(1) rank operation:
3756 *
3757 * rank(bv, n): the number of 1's within a range from index 0 to index n
3758 *
3759 * This can be used to lookup insn_info table from PC.
3760 * For example, consider the following iseq and insn_info_table:
3761 *
3762 * iseq insn_info_table
3763 * PC insn+operand position lineno event
3764 * 0: insn1 0: 1 [Li]
3765 * 2: insn2 2: 2 [Li] <= (A)
3766 * 5: insn3 8: 3 [Li] <= (B)
3767 * 8: insn4
3768 *
3769 * In this case, a succinct bit-vector whose indexes 0, 2, 8 is "1" and
3770 * other indexes is "0", i.e., "101000001", is created.
3771 * To lookup the lineno of insn2, calculate rank("10100001", 2) = 2, so
3772 * the line (A) is the entry in question.
3773 * To lookup the lineno of insn4, calculate rank("10100001", 8) = 3, so
3774 * the line (B) is the entry in question.
3775 *
3776 * A naive implementation of succinct bit-vector works really well
3777 * not only for large size but also for small size. However, it has
3778 * tiny overhead for very small size. So, this implementation consist
3779 * of two parts: one part is the "immediate" table that keeps rank result
3780 * as a raw table, and the other part is a normal succinct bit-vector.
3781 */
3782
3783#define IMMEDIATE_TABLE_SIZE 54 /* a multiple of 9, and < 128 */
3784
3785struct succ_index_table {
3786 uint64_t imm_part[IMMEDIATE_TABLE_SIZE / 9];
3787 struct succ_dict_block {
3788 unsigned int rank;
3789 uint64_t small_block_ranks; /* 9 bits * 7 = 63 bits */
3790 uint64_t bits[512/64];
3791 } succ_part[FLEX_ARY_LEN];
3792};
3793
3794#define imm_block_rank_set(v, i, r) (v) |= (uint64_t)(r) << (7 * (i))
3795#define imm_block_rank_get(v, i) (((int)((v) >> ((i) * 7))) & 0x7f)
3796#define small_block_rank_set(v, i, r) (v) |= (uint64_t)(r) << (9 * ((i) - 1))
3797#define small_block_rank_get(v, i) ((i) == 0 ? 0 : (((int)((v) >> (((i) - 1) * 9))) & 0x1ff))
3798
3799static struct succ_index_table *
3800succ_index_table_create(int max_pos, int *data, int size)
3801{
3802 const int imm_size = (max_pos < IMMEDIATE_TABLE_SIZE ? max_pos + 8 : IMMEDIATE_TABLE_SIZE) / 9;
3803 const int succ_size = (max_pos < IMMEDIATE_TABLE_SIZE ? 0 : (max_pos - IMMEDIATE_TABLE_SIZE + 511)) / 512;
3804 struct succ_index_table *sd =
3805 rb_xcalloc_mul_add_mul(
3806 imm_size, sizeof(uint64_t),
3807 succ_size, sizeof(struct succ_dict_block));
3808 int i, j, k, r;
3809
3810 r = 0;
3811 for (j = 0; j < imm_size; j++) {
3812 for (i = 0; i < 9; i++) {
3813 if (r < size && data[r] == j * 9 + i) r++;
3814 imm_block_rank_set(sd->imm_part[j], i, r);
3815 }
3816 }
3817 for (k = 0; k < succ_size; k++) {
3818 struct succ_dict_block *sd_block = &sd->succ_part[k];
3819 int small_rank = 0;
3820 sd_block->rank = r;
3821 for (j = 0; j < 8; j++) {
3822 uint64_t bits = 0;
3823 if (j) small_block_rank_set(sd_block->small_block_ranks, j, small_rank);
3824 for (i = 0; i < 64; i++) {
3825 if (r < size && data[r] == k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE) {
3826 bits |= ((uint64_t)1) << i;
3827 r++;
3828 }
3829 }
3830 sd_block->bits[j] = bits;
3831 small_rank += rb_popcount64(bits);
3832 }
3833 }
3834 return sd;
3835}
3836
3837static unsigned int *
3838succ_index_table_invert(int max_pos, struct succ_index_table *sd, int size)
3839{
3840 const int imm_size = (max_pos < IMMEDIATE_TABLE_SIZE ? max_pos + 8 : IMMEDIATE_TABLE_SIZE) / 9;
3841 const int succ_size = (max_pos < IMMEDIATE_TABLE_SIZE ? 0 : (max_pos - IMMEDIATE_TABLE_SIZE + 511)) / 512;
3842 unsigned int *positions = ALLOC_N(unsigned int, size), *p;
3843 int i, j, k, r = -1;
3844 p = positions;
3845 for (j = 0; j < imm_size; j++) {
3846 for (i = 0; i < 9; i++) {
3847 int nr = imm_block_rank_get(sd->imm_part[j], i);
3848 if (r != nr) *p++ = j * 9 + i;
3849 r = nr;
3850 }
3851 }
3852 for (k = 0; k < succ_size; k++) {
3853 for (j = 0; j < 8; j++) {
3854 for (i = 0; i < 64; i++) {
3855 if (sd->succ_part[k].bits[j] & (((uint64_t)1) << i)) {
3856 *p++ = k * 512 + j * 64 + i + IMMEDIATE_TABLE_SIZE;
3857 }
3858 }
3859 }
3860 }
3861 return positions;
3862}
3863
3864static int
3865succ_index_lookup(const struct succ_index_table *sd, int x)
3866{
3867 if (x < IMMEDIATE_TABLE_SIZE) {
3868 const int i = x / 9;
3869 const int j = x % 9;
3870 return imm_block_rank_get(sd->imm_part[i], j);
3871 }
3872 else {
3873 const int block_index = (x - IMMEDIATE_TABLE_SIZE) / 512;
3874 const struct succ_dict_block *block = &sd->succ_part[block_index];
3875 const int block_bit_index = (x - IMMEDIATE_TABLE_SIZE) % 512;
3876 const int small_block_index = block_bit_index / 64;
3877 const int small_block_popcount = small_block_rank_get(block->small_block_ranks, small_block_index);
3878 const int popcnt = rb_popcount64(block->bits[small_block_index] << (63 - block_bit_index % 64));
3879
3880 return block->rank + small_block_popcount + popcnt;
3881 }
3882}
3883#endif
3884
3885
3886/*
3887 * call-seq:
3888 * iseq.script_lines -> array or nil
3889 *
3890 * It returns recorded script lines if it is availalble.
3891 * The script lines are not limited to the iseq range, but
3892 * are entire lines of the source file.
3893 *
3894 * Note that this is an API for ruby internal use, debugging,
3895 * and research. Do not use this for any other purpose.
3896 * The compatibility is not guaranteed.
3897 */
3898static VALUE
3899iseqw_script_lines(VALUE self)
3900{
3901 const rb_iseq_t *iseq = iseqw_check(self);
3902 return ISEQ_BODY(iseq)->variable.script_lines;
3903}
3904
3905/*
3906 * Document-class: RubyVM::InstructionSequence
3907 *
3908 * The InstructionSequence class represents a compiled sequence of
3909 * instructions for the Virtual Machine used in MRI. Not all implementations of Ruby
3910 * may implement this class, and for the implementations that implement it,
3911 * the methods defined and behavior of the methods can change in any version.
3912 *
3913 * With it, you can get a handle to the instructions that make up a method or
3914 * a proc, compile strings of Ruby code down to VM instructions, and
3915 * disassemble instruction sequences to strings for easy inspection. It is
3916 * mostly useful if you want to learn how YARV works, but it also lets
3917 * you control various settings for the Ruby iseq compiler.
3918 *
3919 * You can find the source for the VM instructions in +insns.def+ in the Ruby
3920 * source.
3921 *
3922 * The instruction sequence results will almost certainly change as Ruby
3923 * changes, so example output in this documentation may be different from what
3924 * you see.
3925 *
3926 * Of course, this class is MRI specific.
3927 */
3928
3929void
3930Init_ISeq(void)
3931{
3932 /* declare ::RubyVM::InstructionSequence */
3933 rb_cISeq = rb_define_class_under(rb_cRubyVM, "InstructionSequence", rb_cObject);
3934 rb_undef_alloc_func(rb_cISeq);
3935 rb_define_method(rb_cISeq, "inspect", iseqw_inspect, 0);
3936 rb_define_method(rb_cISeq, "disasm", iseqw_disasm, 0);
3937 rb_define_method(rb_cISeq, "disassemble", iseqw_disasm, 0);
3938 rb_define_method(rb_cISeq, "to_a", iseqw_to_a, 0);
3939 rb_define_method(rb_cISeq, "eval", iseqw_eval, 0);
3940
3941 rb_define_method(rb_cISeq, "to_binary", iseqw_to_binary, -1);
3942 rb_define_singleton_method(rb_cISeq, "load_from_binary", iseqw_s_load_from_binary, 1);
3943 rb_define_singleton_method(rb_cISeq, "load_from_binary_extra_data", iseqw_s_load_from_binary_extra_data, 1);
3944
3945 /* location APIs */
3946 rb_define_method(rb_cISeq, "path", iseqw_path, 0);
3947 rb_define_method(rb_cISeq, "absolute_path", iseqw_absolute_path, 0);
3948 rb_define_method(rb_cISeq, "label", iseqw_label, 0);
3949 rb_define_method(rb_cISeq, "base_label", iseqw_base_label, 0);
3950 rb_define_method(rb_cISeq, "first_lineno", iseqw_first_lineno, 0);
3951 rb_define_method(rb_cISeq, "trace_points", iseqw_trace_points, 0);
3952 rb_define_method(rb_cISeq, "each_child", iseqw_each_child, 0);
3953
3954#if 0 /* TBD */
3955 rb_define_private_method(rb_cISeq, "marshal_dump", iseqw_marshal_dump, 0);
3956 rb_define_private_method(rb_cISeq, "marshal_load", iseqw_marshal_load, 1);
3957 /* disable this feature because there is no verifier. */
3958 rb_define_singleton_method(rb_cISeq, "load", iseq_s_load, -1);
3959#endif
3960 (void)iseq_s_load;
3961
3962 rb_define_singleton_method(rb_cISeq, "compile", iseqw_s_compile, -1);
3963 rb_define_singleton_method(rb_cISeq, "new", iseqw_s_compile, -1);
3964 rb_define_singleton_method(rb_cISeq, "compile_file", iseqw_s_compile_file, -1);
3965 rb_define_singleton_method(rb_cISeq, "compile_option", iseqw_s_compile_option_get, 0);
3966 rb_define_singleton_method(rb_cISeq, "compile_option=", iseqw_s_compile_option_set, 1);
3967 rb_define_singleton_method(rb_cISeq, "disasm", iseqw_s_disasm, 1);
3968 rb_define_singleton_method(rb_cISeq, "disassemble", iseqw_s_disasm, 1);
3969 rb_define_singleton_method(rb_cISeq, "of", iseqw_s_of, 1);
3970
3971 // script lines
3972 rb_define_method(rb_cISeq, "script_lines", iseqw_script_lines, 0);
3973
3974 rb_undef_method(CLASS_OF(rb_cISeq), "translate");
3975 rb_undef_method(CLASS_OF(rb_cISeq), "load_iseq");
3976}
#define RUBY_ASSERT(expr)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
Definition assert.h:177
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
#define rb_define_private_method(klass, mid, func, arity)
Defines klass#mid and makes it private.
#define RUBY_EVENT_END
Encountered an end of a class clause.
Definition event.h:36
#define RUBY_EVENT_C_CALL
A method, written in C, is called.
Definition event.h:39
#define RUBY_EVENT_B_RETURN
Encountered a next statement.
Definition event.h:52
#define RUBY_EVENT_CLASS
Encountered a new class.
Definition event.h:35
#define RUBY_EVENT_LINE
Encountered a new line.
Definition event.h:34
#define RUBY_EVENT_RETURN
Encountered a return statement.
Definition event.h:38
#define RUBY_EVENT_C_RETURN
Return from a method, written in C.
Definition event.h:40
#define RUBY_EVENT_B_CALL
Encountered an yield statement.
Definition event.h:51
uint32_t rb_event_flag_t
Represents event(s).
Definition event.h:103
#define RUBY_EVENT_CALL
A method, written in Ruby, is called.
Definition event.h:37
#define RB_OBJ_FREEZE
Just another name of rb_obj_freeze_inline.
Definition fl_type.h:94
VALUE rb_define_class_under(VALUE outer, const char *name, VALUE super)
Defines a class under the namespace of outer.
Definition class.c:920
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
Definition class.c:2073
#define rb_str_new2
Old name of rb_str_new_cstr.
Definition string.h:1675
#define T_FILE
Old name of RUBY_T_FILE.
Definition value_type.h:62
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
Definition long.h:48
#define rb_str_cat2
Old name of rb_str_cat_cstr.
Definition string.h:1683
#define ID2SYM
Old name of RB_ID2SYM.
Definition symbol.h:44
#define SPECIAL_CONST_P
Old name of RB_SPECIAL_CONST_P.
#define ULONG2NUM
Old name of RB_ULONG2NUM.
Definition long.h:60
#define ZALLOC
Old name of RB_ZALLOC.
Definition memory.h:396
#define LL2NUM
Old name of RB_LL2NUM.
Definition long_long.h:30
#define CLASS_OF
Old name of rb_class_of.
Definition globals.h:203
#define T_NONE
Old name of RUBY_T_NONE.
Definition value_type.h:74
#define FIX2INT
Old name of RB_FIX2INT.
Definition int.h:41
#define T_HASH
Old name of RUBY_T_HASH.
Definition value_type.h:65
#define ALLOC_N
Old name of RB_ALLOC_N.
Definition memory.h:393
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
Definition fl_type.h:140
#define LONG2NUM
Old name of RB_LONG2NUM.
Definition long.h:50
#define Qtrue
Old name of RUBY_Qtrue.
#define NUM2INT
Old name of RB_NUM2INT.
Definition int.h:44
#define INT2NUM
Old name of RB_INT2NUM.
Definition int.h:43
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define T_ARRAY
Old name of RUBY_T_ARRAY.
Definition value_type.h:56
#define NIL_P
Old name of RB_NIL_P.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
Definition value_type.h:85
#define NUM2LONG
Old name of RB_NUM2LONG.
Definition long.h:51
#define UINT2NUM
Old name of RB_UINT2NUM.
Definition int.h:46
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
Definition symbol.h:47
#define rb_ary_new2
Old name of rb_ary_new_capa.
Definition array.h:651
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition error.c:3148
void rb_exc_raise(VALUE mesg)
Raises an exception in the current thread.
Definition eval.c:684
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition error.c:794
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1091
VALUE rb_eSyntaxError
SyntaxError exception.
Definition error.c:1108
VALUE rb_class_superclass(VALUE klass)
Queries the parent of the given class.
Definition object.c:1995
VALUE rb_obj_hide(VALUE obj)
Make the object invisible from Ruby code.
Definition object.c:84
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:190
VALUE rb_inspect(VALUE obj)
Generates a human-readable textual representation of the given object.
Definition object.c:600
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
Definition object.c:1182
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition rgengc.h:232
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
Definition rgengc.h:220
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_file_open_str(VALUE fname, const char *fmode)
Identical to rb_file_open(), except it takes the pathname as a Ruby's string instead of C's.
Definition io.c:7166
VALUE rb_io_close(VALUE io)
Closes the IO.
Definition io.c:5668
VALUE rb_obj_is_method(VALUE recv)
Queries if the given object is a method.
Definition proc.c:1637
VALUE rb_obj_is_proc(VALUE recv)
Queries if the given object is a proc.
Definition proc.c:175
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
Definition string.c:3323
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
Definition string.h:1498
#define rb_exc_new_cstr(exc, str)
Identical to rb_exc_new(), except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1670
VALUE rb_str_dup(VALUE str)
Duplicates a string.
Definition string.c:1834
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
Definition string.c:3149
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
Definition string.c:1840
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
Definition string.c:3019
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
Definition string.c:6677
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
Definition string.c:3603
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
Definition string.c:3423
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1656
VALUE rb_str_resize(VALUE str, long len)
Overwrites the length of the string.
Definition string.c:3036
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
Definition string.c:2445
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
Definition string.h:1514
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
Definition symbol.c:844
VALUE rb_class_name(VALUE obj)
Queries the name of the given object's class.
Definition variable.c:307
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
Definition vm_method.c:2823
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
Definition vm_method.c:1159
VALUE rb_check_funcall(VALUE recv, ID mid, int argc, const VALUE *argv)
Identical to rb_funcallv(), except it returns RUBY_Qundef instead of raising rb_eNoMethodError.
Definition vm_eval.c:665
ID rb_check_id(volatile VALUE *namep)
Detects if the given name is already interned or not.
Definition symbol.c:1084
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
Definition symbol.c:942
const char * rb_id2name(ID id)
Retrieves the name mapped to the given id.
Definition symbol.c:959
VALUE rb_ractor_make_shareable(VALUE obj)
Destructively transforms the passed object so that multiple Ractors can share it.
Definition ractor.c:2522
#define RB_NUM2INT
Just another name of rb_num2int_inline.
Definition int.h:38
#define RB_INT2NUM
Just another name of rb_int2num_inline.
Definition int.h:37
VALUE rb_sprintf(const char *fmt,...)
Ruby's extended sprintf(3).
Definition sprintf.c:1219
VALUE rb_str_catf(VALUE dst, const char *fmt,...)
Identical to rb_sprintf(), except it renders the output to the specified object rather than creating ...
Definition sprintf.c:1242
VALUE rb_yield(VALUE val)
Yields the block.
Definition vm_eval.c:1358
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition memory.h:161
#define RB_ZALLOC(type)
Shorthand of RB_ZALLOC_N with n=1.
Definition memory.h:243
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:68
#define RARRAY_AREF(a, i)
Definition rarray.h:583
#define DATA_PTR(obj)
Convenient getter macro.
Definition rdata.h:71
#define RHASH_SIZE(h)
Queries the size of the hash.
Definition rhash.h:82
#define StringValue(v)
Ensures that the parameter object is a String.
Definition rstring.h:72
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
Definition rstring.h:95
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
Definition rtypeddata.h:441
#define FilePathValue(v)
Ensures that the parameter object is a path.
Definition ruby.h:91
#define RTEST
This is an old name of RB_TEST.
Definition node.h:156
Definition iseq.h:263
Definition vm_core.h:281
Definition vm_core.h:276
Definition iseq.h:234
struct rb_iseq_constant_body::@132 param
parameter information
Definition st.h:79
Definition vm_core.h:285
intptr_t SIGNED_VALUE
A signed integer type that has the same width with VALUE.
Definition value.h:63
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52
ruby_value_type
C-level type of an object.
Definition value_type.h:112