/home/travis/build/MoarVM/MoarVM/src/core/bytecodedump.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "moar.h" |
2 | | |
3 | 0 | #define line_length 1024 |
4 | | MVM_FORMAT(printf, 4, 5) |
5 | | static void append_string(char **out, MVMuint32 *size, |
6 | 0 | MVMuint32 *length, char *str, ...) { |
7 | 0 | char string[line_length]; |
8 | 0 | MVMuint32 len; |
9 | 0 | va_list args; |
10 | 0 | va_start(args, str); |
11 | 0 |
|
12 | 0 | vsnprintf(string, line_length, str, args); |
13 | 0 | va_end(args); |
14 | 0 |
|
15 | 0 | len = strlen(string); |
16 | 0 | if (*length + len > *size) { |
17 | 0 | while (*length + len > *size) |
18 | 0 | *size = *size * 2; |
19 | 0 | *out = MVM_realloc(*out, *size); |
20 | 0 | } |
21 | 0 |
|
22 | 0 | memcpy(*out + *length, string, len); |
23 | 0 | *length = *length + len; |
24 | 0 | } |
25 | | |
26 | 0 | static const char * get_typename(MVMuint16 type) { |
27 | 0 | switch(type) { |
28 | 0 | case MVM_reg_int8 : return "int8"; |
29 | 0 | case MVM_reg_int16: return "int16"; |
30 | 0 | case MVM_reg_int32: return "int32"; |
31 | 0 | case MVM_reg_int64: return "int"; |
32 | 0 | case MVM_reg_num32: return "num32"; |
33 | 0 | case MVM_reg_num64: return "num"; |
34 | 0 | case MVM_reg_str : return "str"; |
35 | 0 | case MVM_reg_obj : return "obj"; |
36 | 0 | case MVM_reg_uint8 : return "uint8"; |
37 | 0 | case MVM_reg_uint16: return "uint16"; |
38 | 0 | case MVM_reg_uint32: return "uint32"; |
39 | 0 | case MVM_reg_uint64: return "uint"; |
40 | 0 | default : return "UNKNOWN"; |
41 | 0 | } |
42 | 0 | } |
43 | | |
44 | 0 | #define a(...) append_string(&o,&s,&l, __VA_ARGS__) |
45 | | /* Macros for getting things from the bytecode stream. */ |
46 | | /* GET_REG is defined differently here from interp.c */ |
47 | | #define GET_I8(pc, idx) *((MVMint8 *)((pc) + (idx))) |
48 | | #define GET_REG(pc, idx) *((MVMuint16 *)((pc) + (idx))) |
49 | | #define GET_I16(pc, idx) *((MVMint16 *)((pc) + (idx))) |
50 | 0 | #define GET_UI16(pc, idx) *((MVMuint16 *)((pc) + (idx))) |
51 | | #define GET_I32(pc, idx) *((MVMint32 *)((pc) + (idx))) |
52 | 0 | #define GET_UI32(pc, idx) *((MVMuint32 *)((pc) + (idx))) |
53 | | #define GET_N32(pc, idx) *((MVMnum32 *)((pc) + (idx))) |
54 | | |
55 | | enum { |
56 | | MVM_val_branch_target = 1, |
57 | | MVM_val_op_boundary = 2 |
58 | | }; |
59 | | |
60 | 0 | static MVMStaticFrame * get_frame(MVMThreadContext *tc, MVMCompUnit *cu, int idx) { |
61 | 0 | return ((MVMCode *)cu->body.coderefs[idx])->body.sf; |
62 | 0 | } |
63 | | |
64 | 0 | char * MVM_bytecode_dump(MVMThreadContext *tc, MVMCompUnit *cu) { |
65 | 0 | MVMuint32 s = 1024; |
66 | 0 | MVMuint32 l = 0; |
67 | 0 | MVMuint32 i, j, k; |
68 | 0 | char *o = MVM_calloc(s, sizeof(char)); |
69 | 0 | char ***frame_lexicals = MVM_malloc(sizeof(char **) * cu->body.num_frames); |
70 | 0 | MVMString *name = MVM_string_utf8_decode(tc, tc->instance->VMString, "", 0); |
71 | 0 |
|
72 | 0 | a("\nMoarVM dump of binary compilation unit:\n\n"); |
73 | 0 |
|
74 | 0 | for (k = 0; k < cu->body.num_scs; k++) { |
75 | 0 | char *tmpstr = MVM_string_utf8_encode_C_string( |
76 | 0 | tc, MVM_cu_string(tc, cu, cu->body.sc_handle_idxs[k])); |
77 | 0 | a(" SC_%u : %s\n", k, tmpstr); |
78 | 0 | MVM_free(tmpstr); |
79 | 0 | } |
80 | 0 |
|
81 | 0 | for (k = 0; k < cu->body.num_callsites; k++) { |
82 | 0 | MVMCallsite *callsite = cu->body.callsites[k]; |
83 | 0 | MVMuint16 arg_count = callsite->arg_count; |
84 | 0 | MVMuint16 nameds_count = 0; |
85 | 0 |
|
86 | 0 | a(" Callsite_%u :\n", k); |
87 | 0 | a(" num_pos: %d\n", callsite->num_pos); |
88 | 0 | a(" arg_count: %u\n", arg_count); |
89 | 0 | for (j = 0, i = 0; j < arg_count; j++) { |
90 | 0 | MVMCallsiteEntry csitee = callsite->arg_flags[i++]; |
91 | 0 | a(" Arg %u :", i); |
92 | 0 | if (csitee & MVM_CALLSITE_ARG_NAMED) { |
93 | 0 | if (callsite->arg_names) { |
94 | 0 | char *arg_name = MVM_string_utf8_encode_C_string(tc, |
95 | 0 | callsite->arg_names[nameds_count++]); |
96 | 0 | a(" named(%s)", arg_name); |
97 | 0 | MVM_free(arg_name); |
98 | 0 | } |
99 | 0 | else { |
100 | 0 | a(" named"); |
101 | 0 | } |
102 | 0 | j++; |
103 | 0 | } |
104 | 0 | else if (csitee & MVM_CALLSITE_ARG_FLAT_NAMED) { |
105 | 0 | a(" flatnamed"); |
106 | 0 | } |
107 | 0 | else if (csitee & MVM_CALLSITE_ARG_FLAT) { |
108 | 0 | a(" flat"); |
109 | 0 | } |
110 | 0 | else a(" positional"); |
111 | 0 | if (csitee & MVM_CALLSITE_ARG_OBJ) a(" obj"); |
112 | 0 | else if (csitee & MVM_CALLSITE_ARG_INT) a(" int"); |
113 | 0 | else if (csitee & MVM_CALLSITE_ARG_NUM) a(" num"); |
114 | 0 | else if (csitee & MVM_CALLSITE_ARG_STR) a(" str"); |
115 | 0 | if (csitee & MVM_CALLSITE_ARG_FLAT) a(" flat"); |
116 | 0 | a("\n"); |
117 | 0 | } |
118 | 0 | } |
119 | 0 | for (k = 0; k < cu->body.num_frames; k++) |
120 | 0 | MVM_bytecode_finish_frame(tc, cu, get_frame(tc, cu, k), 1); |
121 | 0 |
|
122 | 0 | for (k = 0; k < cu->body.num_frames; k++) { |
123 | 0 | MVMStaticFrame *frame = get_frame(tc, cu, k); |
124 | 0 | MVMLexicalRegistry *current, *tmp; |
125 | 0 | unsigned bucket_tmp; |
126 | 0 | char **lexicals; |
127 | 0 |
|
128 | 0 | if (!frame->body.fully_deserialized) { |
129 | 0 | MVM_bytecode_finish_frame(tc, cu, frame, 1); |
130 | 0 | } |
131 | 0 |
|
132 | 0 | lexicals = (char **)MVM_malloc(sizeof(char *) * frame->body.num_lexicals); |
133 | 0 | frame_lexicals[k] = lexicals; |
134 | 0 |
|
135 | 0 | HASH_ITER(hash_handle, frame->body.lexical_names, current, tmp, bucket_tmp) { |
136 | 0 | name->body.storage.blob_32 = (MVMint32 *)current->hash_handle.key; |
137 | 0 | name->body.num_graphs = (MVMuint32)current->hash_handle.keylen / sizeof(MVMGrapheme32); |
138 | 0 | lexicals[current->value] = MVM_string_utf8_encode_C_string(tc, name); |
139 | 0 | } |
140 | 0 | } |
141 | 0 | for (k = 0; k < cu->body.num_frames; k++) { |
142 | 0 | MVMStaticFrame *frame = get_frame(tc, cu, k); |
143 | 0 | char *cuuid; |
144 | 0 | char *fname; |
145 | 0 | cuuid = MVM_string_utf8_encode_C_string(tc, frame->body.cuuid); |
146 | 0 | fname = MVM_string_utf8_encode_C_string(tc, frame->body.name); |
147 | 0 | a(" Frame_%u :\n", k); |
148 | 0 | a(" cuuid : %s\n", cuuid); |
149 | 0 | MVM_free(cuuid); |
150 | 0 | a(" name : %s\n", fname); |
151 | 0 | MVM_free(fname); |
152 | 0 | for (j = 0; j < cu->body.num_frames; j++) { |
153 | 0 | if (get_frame(tc, cu, j) == frame->body.outer) |
154 | 0 | a(" outer : Frame_%u\n", j); |
155 | 0 | } |
156 | 0 |
|
157 | 0 | for (j = 0; j < frame->body.num_locals; j++) { |
158 | 0 | if (!j) |
159 | 0 | a(" Locals :\n"); |
160 | 0 | a(" %6u: loc_%u_%s\n", j, j, get_typename(frame->body.local_types[j])); |
161 | 0 | } |
162 | 0 |
|
163 | 0 | for (j = 0; j < frame->body.num_lexicals; j++) { |
164 | 0 | if (!j) |
165 | 0 | a(" Lexicals :\n"); |
166 | 0 | a(" %6u: lex_Frame_%u_%s_%s\n", j, k, frame_lexicals[k][j], get_typename(frame->body.lexical_types[j])); |
167 | 0 | } |
168 | 0 | a(" Instructions :\n"); |
169 | 0 | { |
170 | 0 |
|
171 | 0 | /* mostly stolen from validation.c */ |
172 | 0 | MVMStaticFrame *static_frame = frame; |
173 | 0 | MVMuint32 bytecode_size = static_frame->body.bytecode_size; |
174 | 0 | MVMuint8 *bytecode_start = static_frame->body.bytecode; |
175 | 0 | MVMuint8 *bytecode_end = bytecode_start + bytecode_size; |
176 | 0 | /* current position in the bytestream */ |
177 | 0 | MVMuint8 *cur_op = bytecode_start; |
178 | 0 | /* positions in the bytestream that are starts of ops and goto targets */ |
179 | 0 | MVMuint8 *labels = MVM_calloc(1, bytecode_size); |
180 | 0 | MVMuint32 *jumps = MVM_calloc(1, sizeof(MVMuint32) * bytecode_size); |
181 | 0 | char **lines = MVM_malloc(sizeof(char *) * bytecode_size); |
182 | 0 | MVMuint32 *linelocs = MVM_malloc(sizeof(MVMuint32) * bytecode_size); |
183 | 0 | MVMuint32 lineno = 0; |
184 | 0 | MVMuint32 lineloc; |
185 | 0 | MVMuint16 op_num; |
186 | 0 | const MVMOpInfo *op_info; |
187 | 0 | MVMuint32 operand_size = 0; |
188 | 0 | unsigned char op_rw; |
189 | 0 | unsigned char op_type; |
190 | 0 | unsigned char op_flags; |
191 | 0 | MVMOpInfo tmp_extop_info; |
192 | 0 | /* stash the outer output buffer */ |
193 | 0 | MVMuint32 sP = s; |
194 | 0 | MVMuint32 lP = l; |
195 | 0 | char *oP = o; |
196 | 0 | char *tmpstr; |
197 | 0 | while (cur_op < bytecode_end - 1) { |
198 | 0 |
|
199 | 0 | /* allocate a line buffer */ |
200 | 0 | s = 200; |
201 | 0 | l = 0; |
202 | 0 | o = MVM_calloc(s, sizeof(char)); |
203 | 0 |
|
204 | 0 | lineloc = cur_op - bytecode_start; |
205 | 0 | /* mark that this line starts at this point in the bytestream */ |
206 | 0 | linelocs[lineno] = lineloc; |
207 | 0 | /* mark that this point in the bytestream is an op boundary */ |
208 | 0 | labels[lineloc] |= MVM_val_op_boundary; |
209 | 0 |
|
210 | 0 | op_num = *((MVMint16 *)cur_op); |
211 | 0 | cur_op += 2; |
212 | 0 | if (op_num < MVM_OP_EXT_BASE) { |
213 | 0 | op_info = MVM_op_get_op(op_num); |
214 | 0 | a("%-12s ", op_info->name); |
215 | 0 | } |
216 | 0 | else { |
217 | 0 | MVMint16 ext_op_num = op_num - MVM_OP_EXT_BASE; |
218 | 0 | if (ext_op_num < cu->body.num_extops) { |
219 | 0 | MVMExtOpRecord r = cu->body.extops[ext_op_num]; |
220 | 0 | MVMuint8 j; |
221 | 0 | memset(&tmp_extop_info, 0, sizeof(MVMOpInfo)); |
222 | 0 | tmp_extop_info.name = MVM_string_utf8_encode_C_string(tc, r.name); |
223 | 0 | memcpy(tmp_extop_info.operands, r.operand_descriptor, 8); |
224 | 0 | for (j = 0; j < 8; j++) |
225 | 0 | if (tmp_extop_info.operands[j]) |
226 | 0 | tmp_extop_info.num_operands++; |
227 | 0 | else |
228 | 0 | break; |
229 | 0 | op_info = &tmp_extop_info; |
230 | 0 | a("%-12s ", tmp_extop_info.name); |
231 | 0 | MVM_free((void *)tmp_extop_info.name); |
232 | 0 | tmp_extop_info.name = NULL; |
233 | 0 | } |
234 | 0 | else { |
235 | 0 | MVM_exception_throw_adhoc(tc, "Extension op %d out of range", (int)op_num); |
236 | 0 | } |
237 | 0 | } |
238 | 0 |
|
239 | 0 | for (i = 0; i < op_info->num_operands; i++) { |
240 | 0 | if (i) a(", "); |
241 | 0 | op_flags = op_info->operands[i]; |
242 | 0 | op_rw = op_flags & MVM_operand_rw_mask; |
243 | 0 | op_type = op_flags & MVM_operand_type_mask; |
244 | 0 |
|
245 | 0 | if (op_rw == MVM_operand_literal) { |
246 | 0 | switch (op_type) { |
247 | 0 | case MVM_operand_int8: |
248 | 0 | operand_size = 1; |
249 | 0 | a("%"PRId8, GET_I8(cur_op, 0)); |
250 | 0 | break; |
251 | 0 | case MVM_operand_int16: |
252 | 0 | operand_size = 2; |
253 | 0 | a("%"PRId16, GET_I16(cur_op, 0)); |
254 | 0 | break; |
255 | 0 | case MVM_operand_int32: |
256 | 0 | operand_size = 4; |
257 | 0 | a("%"PRId32, GET_I32(cur_op, 0)); |
258 | 0 | break; |
259 | 0 | case MVM_operand_int64: |
260 | 0 | operand_size = 8; |
261 | 0 | a("%"PRId64, MVM_BC_get_I64(cur_op, 0)); |
262 | 0 | break; |
263 | 0 | case MVM_operand_num32: |
264 | 0 | operand_size = 4; |
265 | 0 | a("%f", GET_N32(cur_op, 0)); |
266 | 0 | break; |
267 | 0 | case MVM_operand_num64: |
268 | 0 | operand_size = 8; |
269 | 0 | a("%f", MVM_BC_get_N64(cur_op, 0)); |
270 | 0 | break; |
271 | 0 | case MVM_operand_callsite: |
272 | 0 | operand_size = 2; |
273 | 0 | a("Callsite_%"PRIu16, GET_UI16(cur_op, 0)); |
274 | 0 | break; |
275 | 0 | case MVM_operand_coderef: |
276 | 0 | operand_size = 2; |
277 | 0 | a("Frame_%"PRIu16, GET_UI16(cur_op, 0)); |
278 | 0 | break; |
279 | 0 | case MVM_operand_str: |
280 | 0 | operand_size = 4; |
281 | 0 | tmpstr = MVM_string_utf8_encode_C_string( |
282 | 0 | tc, MVM_cu_string(tc, cu, GET_UI32(cur_op, 0))); |
283 | 0 | /* XXX C-string-literal escape the \ and ' |
284 | 0 | and line breaks and non-ascii someday */ |
285 | 0 | a("'%s'", tmpstr); |
286 | 0 | MVM_free(tmpstr); |
287 | 0 | break; |
288 | 0 | case MVM_operand_ins: |
289 | 0 | operand_size = 4; |
290 | 0 | /* luckily all the ins operands are at the end |
291 | 0 | of op operands, so I can wait to resolve the label |
292 | 0 | to the end. */ |
293 | 0 | labels[GET_UI32(cur_op, 0)] |= MVM_val_branch_target; |
294 | 0 | jumps[lineno] = GET_UI32(cur_op, 0); |
295 | 0 | break; |
296 | 0 | case MVM_operand_obj: |
297 | 0 | /* not sure what a literal object is */ |
298 | 0 | operand_size = 4; |
299 | 0 | break; |
300 | 0 | default: |
301 | 0 | abort(); /* never reached, silence compiler warnings */ |
302 | 0 | } |
303 | 0 | } |
304 | 0 | else if (op_rw == MVM_operand_read_reg || op_rw == MVM_operand_write_reg) { |
305 | 0 | /* register operand */ |
306 | 0 | operand_size = 2; |
307 | 0 | a("loc_%u_%s", GET_REG(cur_op, 0), |
308 | 0 | get_typename(frame->body.local_types[GET_REG(cur_op, 0)])); |
309 | 0 | } |
310 | 0 | else if (op_rw == MVM_operand_read_lex || op_rw == MVM_operand_write_lex) { |
311 | 0 | /* lexical operand */ |
312 | 0 | MVMuint16 idx, frames, m; |
313 | 0 | MVMStaticFrame *applicable_frame = static_frame; |
314 | 0 |
|
315 | 0 | operand_size = 4; |
316 | 0 | idx = GET_UI16(cur_op, 0); |
317 | 0 | frames = GET_UI16(cur_op, 2); |
318 | 0 |
|
319 | 0 | m = frames; |
320 | 0 | while (m > 0) { |
321 | 0 | applicable_frame = applicable_frame->body.outer; |
322 | 0 | m--; |
323 | 0 | } |
324 | 0 | /* inefficient, I know. should use a hash. */ |
325 | 0 | for (m = 0; m < cu->body.num_frames; m++) { |
326 | 0 | if (get_frame(tc, cu, m) == applicable_frame) { |
327 | 0 | a("lex_Frame_%u_%s_%s", m, frame_lexicals[m][idx], |
328 | 0 | get_typename(applicable_frame->body.lexical_types[idx])); |
329 | 0 | } |
330 | 0 | } |
331 | 0 | } |
332 | 0 | cur_op += operand_size; |
333 | 0 | } |
334 | 0 | lines[lineno++] = o; |
335 | 0 | } |
336 | 0 | { |
337 | 0 | MVMuint32 *linelabels = MVM_calloc(lineno, sizeof(MVMuint32)); |
338 | 0 | MVMuint32 byte_offset = 0; |
339 | 0 | MVMuint32 line_number = 0; |
340 | 0 | MVMuint32 label_number = 1; |
341 | 0 | MVMuint32 *annotations = MVM_calloc(lineno, sizeof(MVMuint32)); |
342 | 0 |
|
343 | 0 | for (; byte_offset < bytecode_size; byte_offset++) { |
344 | 0 | if (labels[byte_offset] & MVM_val_branch_target) { |
345 | 0 | /* found a byte_offset where a label should be. |
346 | 0 | now crawl up through the lines to find which line starts there */ |
347 | 0 | while (linelocs[line_number] != byte_offset) line_number++; |
348 | 0 | linelabels[line_number] = label_number++; |
349 | 0 | } |
350 | 0 | } |
351 | 0 | o = oP; |
352 | 0 | l = lP; |
353 | 0 | s = sP; |
354 | 0 |
|
355 | 0 | i = 0; |
356 | 0 | /* resolve annotation line numbers */ |
357 | 0 | for (j = 0; j < frame->body.num_annotations; j++) { |
358 | 0 | MVMuint32 ann_offset = GET_UI32(frame->body.annotations_data, j*12); |
359 | 0 | for (; i < lineno; i++) { |
360 | 0 | if (linelocs[i] == ann_offset) { |
361 | 0 | annotations[i] = j + 1; |
362 | 0 | break; |
363 | 0 | } |
364 | 0 | } |
365 | 0 | } |
366 | 0 |
|
367 | 0 | for (j = 0; j < lineno; j++) { |
368 | 0 | if (annotations[j]) { |
369 | 0 | MVMuint16 shi = GET_UI16(frame->body.annotations_data + 4, (annotations[j] - 1)*12); |
370 | 0 | tmpstr = MVM_string_utf8_encode_C_string( |
371 | 0 | tc, MVM_cu_string(tc, cu, shi < cu->body.num_strings ? shi : 0)); |
372 | 0 | a(" annotation: %s:%u\n", tmpstr, GET_UI32(frame->body.annotations_data, (annotations[j] - 1)*12 + 8)); |
373 | 0 | MVM_free(tmpstr); |
374 | 0 | } |
375 | 0 | if (linelabels[j]) |
376 | 0 | a(" label_%u:\n", linelabels[j]); |
377 | 0 | a("%05d %s", j, lines[j]); |
378 | 0 | MVM_free(lines[j]); |
379 | 0 | if (jumps[j]) { |
380 | 0 | /* horribly inefficient for large frames. again, should use a hash */ |
381 | 0 | line_number = 0; |
382 | 0 | while (linelocs[line_number] != jumps[j]) line_number++; |
383 | 0 | a("label_%u(%05u)", linelabels[line_number], line_number); |
384 | 0 | } |
385 | 0 | a("\n"); |
386 | 0 | } |
387 | 0 | MVM_free(lines); |
388 | 0 | MVM_free(jumps); |
389 | 0 | MVM_free(linelocs); |
390 | 0 | MVM_free(linelabels); |
391 | 0 | MVM_free(labels); |
392 | 0 | MVM_free(annotations); |
393 | 0 | } |
394 | 0 |
|
395 | 0 | } |
396 | 0 | } |
397 | 0 |
|
398 | 0 | o[l] = '\0'; |
399 | 0 |
|
400 | 0 | for (k = 0; k < cu->body.num_frames; k++) { |
401 | 0 | for (j = 0; j < get_frame(tc, cu, k)->body.num_lexicals; j++) { |
402 | 0 | MVM_free(frame_lexicals[k][j]); |
403 | 0 | } |
404 | 0 | MVM_free(frame_lexicals[k]); |
405 | 0 | } |
406 | 0 | MVM_free(frame_lexicals); |
407 | 0 | return o; |
408 | 0 | } |