/home/travis/build/MoarVM/MoarVM/src/spesh/graph.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "moar.h" |
2 | | |
3 | | /* This is where the spesh stuff all begins. The logic in here takes bytecode |
4 | | * and builds a spesh graph from it. This is a CFG in SSA form. Transforming |
5 | | * to SSA involves computing dominance frontiers, done by the algorithm found |
6 | | * in http://www.cs.rice.edu/~keith/EMBED/dom.pdf. The SSA algorithm itself is |
7 | | * from http://www.cs.utexas.edu/~pingali/CS380C/2010/papers/ssaCytron.pdf. */ |
8 | | |
9 | 0 | #define GET_I8(pc, idx) *((MVMint8 *)((pc) + (idx))) |
10 | | #define GET_UI8(pc, idx) *((MVMuint8 *)((pc) + (idx))) |
11 | 412k | #define GET_I16(pc, idx) *((MVMint16 *)((pc) + (idx))) |
12 | 2.09M | #define GET_UI16(pc, idx) *((MVMuint16 *)((pc) + (idx))) |
13 | 22 | #define GET_I32(pc, idx) *((MVMint32 *)((pc) + (idx))) |
14 | 307k | #define GET_UI32(pc, idx) *((MVMuint32 *)((pc) + (idx))) |
15 | 0 | #define GET_N32(pc, idx) *((MVMnum32 *)((pc) + (idx))) |
16 | | |
17 | | /* Allocate a piece of memory from the spesh graph's region |
18 | | * allocator. Deallocated when the spesh graph is. */ |
19 | 19.7M | void * MVM_spesh_alloc(MVMThreadContext *tc, MVMSpeshGraph *g, size_t bytes) { |
20 | 19.7M | return MVM_region_alloc(tc, &g->region_alloc, bytes); |
21 | 19.7M | } |
22 | | |
23 | | /* Looks up op info; doesn't sanity check, since we should be working on code |
24 | | * that already pass validation. */ |
25 | 1.32M | static const MVMOpInfo * get_op_info(MVMThreadContext *tc, MVMCompUnit *cu, MVMuint16 opcode) { |
26 | 1.32M | if (opcode < MVM_OP_EXT_BASE) { |
27 | 1.32M | return MVM_op_get_op(opcode); |
28 | 1.32M | } |
29 | 0 | else { |
30 | 0 | MVMuint16 index = opcode - MVM_OP_EXT_BASE; |
31 | 0 | MVMExtOpRecord *record = &cu->body.extops[index]; |
32 | 0 | return MVM_ext_resolve_extop_record(tc, record); |
33 | 0 | } |
34 | 1.32M | } |
35 | | |
36 | | /* Grows the spesh graph's deopt table if it is already full, so that we have |
37 | | * space for 1 more entry. */ |
38 | 277k | void MVM_spesh_graph_grow_deopt_table(MVMThreadContext *tc, MVMSpeshGraph *g) { |
39 | 277k | if (g->num_deopt_addrs == g->alloc_deopt_addrs) { |
40 | 40.2k | g->alloc_deopt_addrs += 8; |
41 | 40.2k | if (g->deopt_addrs) |
42 | 29.2k | g->deopt_addrs = MVM_realloc(g->deopt_addrs, |
43 | 29.2k | g->alloc_deopt_addrs * sizeof(MVMint32) * 2); |
44 | 40.2k | else |
45 | 11.0k | g->deopt_addrs = MVM_malloc(g->alloc_deopt_addrs * sizeof(MVMint32) * 2); |
46 | 40.2k | } |
47 | 277k | } |
48 | | |
49 | | /* Records a de-optimization annotation and mapping pair. */ |
50 | | void MVM_spesh_graph_add_deopt_annotation(MVMThreadContext *tc, MVMSpeshGraph *g, |
51 | | MVMSpeshIns *ins_node, MVMuint32 deopt_target, |
52 | 277k | MVMint32 type) { |
53 | 277k | /* Add an annotations. */ |
54 | 277k | MVMSpeshAnn *ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); |
55 | 277k | ann->type = type; |
56 | 277k | ann->data.deopt_idx = g->num_deopt_addrs; |
57 | 277k | ann->next = ins_node->annotations; |
58 | 277k | ins_node->annotations = ann; |
59 | 277k | |
60 | 277k | /* Record PC in the deopt entries table. */ |
61 | 277k | MVM_spesh_graph_grow_deopt_table(tc, g); |
62 | 277k | g->deopt_addrs[2 * g->num_deopt_addrs] = deopt_target; |
63 | 277k | g->num_deopt_addrs++; |
64 | 277k | } |
65 | | |
66 | | /* Records the current bytecode position as a logged annotation. Used for |
67 | | * resolving logged values. */ |
68 | | static void add_logged_annotation(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshIns *ins_node, |
69 | 86.2k | MVMuint8 *pc) { |
70 | 86.2k | MVMSpeshAnn *ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); |
71 | 86.2k | ann->type = MVM_SPESH_ANN_LOGGED; |
72 | 86.2k | ann->data.bytecode_offset = pc - g->bytecode; |
73 | 86.2k | ann->next = ins_node->annotations; |
74 | 86.2k | ins_node->annotations = ann; |
75 | 86.2k | } |
76 | | |
77 | | /* Finds the linearly previous basic block (not cheap, but uncommon). */ |
78 | 15.2k | MVMSpeshBB * MVM_spesh_graph_linear_prev(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB *search) { |
79 | 15.2k | MVMSpeshBB *bb = g->entry; |
80 | 596k | while (bb) { |
81 | 596k | if (bb->linear_next == search) |
82 | 15.2k | return bb; |
83 | 581k | bb = bb->linear_next; |
84 | 581k | } |
85 | 0 | return NULL; |
86 | 15.2k | } |
87 | | |
88 | | /* Checks if a handler is a catch handler or a control handler. */ |
89 | 30.6k | static MVMint32 is_catch_handler(MVMThreadContext *tc, MVMSpeshGraph *g, MVMint32 handler_idx) { |
90 | 30.6k | return g->handlers[handler_idx].category_mask & MVM_EX_CAT_CATCH; |
91 | 30.6k | } |
92 | | |
93 | | /* Checks if a basic block already has a particular successor. */ |
94 | 242k | static MVMint32 already_succs(MVMThreadContext *tc, MVMSpeshBB *bb, MVMSpeshBB *succ) { |
95 | 242k | MVMint32 i = 0; |
96 | 896k | for (i = 0; i < bb->num_succ; i++) |
97 | 657k | if (bb->succ[i] == succ) |
98 | 4.08k | return 1; |
99 | 238k | return 0; |
100 | 242k | } |
101 | | |
102 | | /* Builds the control flow graph, populating the passed spesh graph structure |
103 | | * with it. This also makes nodes for all of the instruction. */ |
104 | 2.80M | #define MVM_CFG_BB_START 1 |
105 | 1.82M | #define MVM_CFG_BB_END 2 |
106 | 482k | #define MVM_CFG_BB_JUMPLIST 4 |
107 | | static void build_cfg(MVMThreadContext *tc, MVMSpeshGraph *g, MVMStaticFrame *sf, |
108 | 21.1k | MVMint32 *existing_deopts, MVMint32 num_existing_deopts) { |
109 | 21.1k | MVMSpeshBB *cur_bb, *prev_bb; |
110 | 21.1k | MVMSpeshIns *last_ins; |
111 | 21.1k | MVMint64 i; |
112 | 21.1k | MVMint32 bb_idx; |
113 | 21.1k | |
114 | 21.1k | /* Temporary array of all MVMSpeshIns we create (one per instruction). |
115 | 21.1k | * Overestimate at size. Has the flat view, matching the bytecode. */ |
116 | 21.1k | MVMSpeshIns **ins_flat = MVM_calloc(g->bytecode_size / 2, sizeof(MVMSpeshIns *)); |
117 | 21.1k | |
118 | 21.1k | /* Temporary array where each byte in the input bytecode gets a 32-bit |
119 | 21.1k | * integer. This is used for two things: |
120 | 21.1k | * A) When we make the MVMSpeshIns for an instruction starting at the |
121 | 21.1k | * byte, we put the instruction index (into ins_flat) in the slot, |
122 | 21.1k | * shifting it by 3 bits to the left. We will use this to do fixups. |
123 | 21.1k | * B) The first bit is "I have an incoming branch" - that is, start of |
124 | 21.1k | * a basic block. The second bit is "I can branch" - that is, end of |
125 | 21.1k | * a basic block. It's possible to have both bits set. If it's part |
126 | 21.1k | * of a jumplist, it gets the third bit set also. |
127 | 21.1k | * Anything that's just a zero has no instruction starting there. */ |
128 | 21.1k | MVMuint32 *byte_to_ins_flags = MVM_calloc(g->bytecode_size, sizeof(MVMuint32)); |
129 | 21.1k | |
130 | 21.1k | /* Instruction to basic block mapping. Initialized later. */ |
131 | 21.1k | MVMSpeshBB **ins_to_bb = NULL; |
132 | 21.1k | |
133 | 21.1k | /* Which handlers are active; used for placing edges from blocks covered |
134 | 21.1k | * by exception handlers. */ |
135 | 21.1k | MVMuint8 *active_handlers = MVM_calloc(1, g->num_handlers); |
136 | 21.1k | MVMint32 num_active_handlers = 0; |
137 | 21.1k | |
138 | 21.1k | /* Make first pass through the bytecode. In this pass, we make MVMSpeshIns |
139 | 21.1k | * nodes for each instruction and set the start/end of block bits. Also |
140 | 21.1k | * set handler targets as basic block starters. */ |
141 | 21.1k | MVMCompUnit *cu = sf->body.cu; |
142 | 21.1k | MVMuint8 *pc = g->bytecode; |
143 | 21.1k | MVMuint8 *end = g->bytecode + g->bytecode_size; |
144 | 21.1k | MVMuint32 ins_idx = 0; |
145 | 21.1k | MVMuint8 next_bbs = 1; /* Next iteration (here, first) starts a BB. */ |
146 | 21.1k | MVMuint32 lineno_ann_offs = 0; |
147 | 21.1k | MVMuint32 num_osr_points = 0; |
148 | 21.1k | |
149 | 21.1k | MVMBytecodeAnnotation *ann_ptr = MVM_bytecode_resolve_annotation(tc, &sf->body, sf->body.bytecode - pc); |
150 | 21.1k | |
151 | 31.4k | for (i = 0; i < g->num_handlers; i++) { |
152 | 10.2k | if (g->handlers[i].start_offset != -1 && g->handlers[i].goto_offset != -1) { |
153 | 10.2k | byte_to_ins_flags[g->handlers[i].start_offset] |= MVM_CFG_BB_START; |
154 | 10.2k | byte_to_ins_flags[g->handlers[i].end_offset] |= MVM_CFG_BB_START; |
155 | 10.2k | byte_to_ins_flags[g->handlers[i].goto_offset] |= MVM_CFG_BB_START; |
156 | 10.2k | } |
157 | 10.2k | } |
158 | 1.32M | while (pc < end) { |
159 | 1.30M | /* Look up op info. */ |
160 | 1.30M | MVMuint16 opcode = *(MVMuint16 *)pc; |
161 | 1.30M | MVMuint8 *args = pc + 2; |
162 | 1.30M | MVMuint8 arg_size = 0; |
163 | 1.30M | const MVMOpInfo *info = get_op_info(tc, cu, opcode); |
164 | 1.30M | |
165 | 1.30M | /* Create an instruction node, add it, and record its position. */ |
166 | 1.30M | MVMSpeshIns *ins_node = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshIns)); |
167 | 1.30M | ins_flat[ins_idx] = ins_node; |
168 | 1.30M | byte_to_ins_flags[pc - g->bytecode] |= ins_idx << 3; |
169 | 1.30M | |
170 | 1.30M | /* Did previous instruction end a basic block? */ |
171 | 1.30M | if (next_bbs) { |
172 | 403k | byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_START; |
173 | 403k | next_bbs = 0; |
174 | 403k | } |
175 | 1.30M | |
176 | 1.30M | /* Also check we're not already a BB start due to being a branch |
177 | 1.30M | * target, in which case we should ensure our prior is marked as |
178 | 1.30M | * a BB end. */ |
179 | 903k | else { |
180 | 903k | if (byte_to_ins_flags[pc - g->bytecode] & MVM_CFG_BB_START) { |
181 | 69.2k | MVMuint32 hunt = pc - g->bytecode; |
182 | 459k | while (!byte_to_ins_flags[--hunt]); |
183 | 69.2k | byte_to_ins_flags[hunt] |= MVM_CFG_BB_END; |
184 | 69.2k | } |
185 | 903k | } |
186 | 1.30M | |
187 | 1.30M | /* Store opcode. */ |
188 | 1.30M | ins_node->info = info; |
189 | 1.30M | |
190 | 1.30M | /* If this is a pre-instruction deopt point opcode, annotate. */ |
191 | 1.30M | if (!existing_deopts && (info->deopt_point & MVM_DEOPT_MARK_ONE_PRE)) |
192 | 36.0k | MVM_spesh_graph_add_deopt_annotation(tc, g, ins_node, |
193 | 36.0k | pc - g->bytecode, MVM_SPESH_ANN_DEOPT_ONE_INS); |
194 | 1.30M | |
195 | 1.30M | /* Let's see if we have a line-number annotation. */ |
196 | 1.30M | if (ann_ptr && pc - sf->body.bytecode == ann_ptr->bytecode_offset) { |
197 | 10.8k | MVMSpeshAnn *lineno_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); |
198 | 10.8k | lineno_ann->next = ins_node->annotations; |
199 | 10.8k | lineno_ann->type = MVM_SPESH_ANN_LINENO; |
200 | 10.8k | lineno_ann->data.lineno.filename_string_index = ann_ptr->filename_string_heap_index; |
201 | 10.8k | lineno_ann->data.lineno.line_number = ann_ptr->line_number; |
202 | 10.8k | ins_node->annotations = lineno_ann; |
203 | 10.8k | |
204 | 10.8k | MVM_bytecode_advance_annotation(tc, &sf->body, ann_ptr); |
205 | 10.8k | } |
206 | 1.30M | |
207 | 1.30M | /* Go over operands. */ |
208 | 1.30M | ins_node->operands = MVM_spesh_alloc(tc, g, info->num_operands * sizeof(MVMSpeshOperand)); |
209 | 4.11M | for (i = 0; i < info->num_operands; i++) { |
210 | 2.80M | MVMuint8 flags = info->operands[i]; |
211 | 2.80M | MVMuint8 rw = flags & MVM_operand_rw_mask; |
212 | 2.80M | switch (rw) { |
213 | 2.03M | case MVM_operand_read_reg: |
214 | 2.03M | case MVM_operand_write_reg: |
215 | 2.03M | ins_node->operands[i].reg.orig = GET_UI16(args, arg_size); |
216 | 2.03M | arg_size += 2; |
217 | 2.03M | break; |
218 | 14.7k | case MVM_operand_read_lex: |
219 | 14.7k | case MVM_operand_write_lex: |
220 | 14.7k | ins_node->operands[i].lex.idx = GET_UI16(args, arg_size); |
221 | 14.7k | ins_node->operands[i].lex.outers = GET_UI16(args, arg_size + 2); |
222 | 14.7k | arg_size += 4; |
223 | 14.7k | break; |
224 | 764k | case MVM_operand_literal: { |
225 | 764k | MVMuint32 type = flags & MVM_operand_type_mask; |
226 | 764k | switch (type) { |
227 | 0 | case MVM_operand_int8: |
228 | 0 | ins_node->operands[i].lit_i8 = GET_I8(args, arg_size); |
229 | 0 | arg_size += 1; |
230 | 0 | break; |
231 | 370k | case MVM_operand_int16: |
232 | 370k | ins_node->operands[i].lit_i16 = GET_I16(args, arg_size); |
233 | 370k | arg_size += 2; |
234 | 370k | break; |
235 | 22 | case MVM_operand_int32: |
236 | 22 | ins_node->operands[i].lit_i32 = GET_I32(args, arg_size); |
237 | 22 | arg_size += 4; |
238 | 22 | break; |
239 | 4.84k | case MVM_operand_uint32: |
240 | 4.84k | ins_node->operands[i].lit_ui32 = GET_UI32(args, arg_size); |
241 | 4.84k | arg_size += 4; |
242 | 4.84k | break; |
243 | 4.41k | case MVM_operand_int64: |
244 | 4.41k | ins_node->operands[i].lit_i64 = MVM_BC_get_I64(args, arg_size); |
245 | 4.41k | arg_size += 8; |
246 | 4.41k | break; |
247 | 0 | case MVM_operand_num32: |
248 | 0 | ins_node->operands[i].lit_n32 = GET_N32(args, arg_size); |
249 | 0 | arg_size += 4; |
250 | 0 | break; |
251 | 102 | case MVM_operand_num64: |
252 | 102 | ins_node->operands[i].lit_n64 = MVM_BC_get_N64(args, arg_size); |
253 | 102 | arg_size += 8; |
254 | 102 | break; |
255 | 38.7k | case MVM_operand_callsite: |
256 | 38.7k | ins_node->operands[i].callsite_idx = GET_UI16(args, arg_size); |
257 | 38.7k | arg_size += 2; |
258 | 38.7k | break; |
259 | 511 | case MVM_operand_coderef: |
260 | 511 | ins_node->operands[i].coderef_idx = GET_UI16(args, arg_size); |
261 | 511 | arg_size += 2; |
262 | 511 | break; |
263 | 147k | case MVM_operand_str: |
264 | 147k | ins_node->operands[i].lit_str_idx = GET_UI32(args, arg_size); |
265 | 147k | arg_size += 4; |
266 | 147k | break; |
267 | 155k | case MVM_operand_ins: { |
268 | 155k | /* Stash instruction offset. */ |
269 | 155k | MVMuint32 target = GET_UI32(args, arg_size); |
270 | 155k | ins_node->operands[i].ins_offset = target; |
271 | 155k | |
272 | 155k | /* This is a branching instruction, so it's a BB end. */ |
273 | 155k | byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; |
274 | 155k | |
275 | 155k | /* Its target is a BB start, and any previous instruction |
276 | 155k | * we already passed needs marking as a BB end. */ |
277 | 155k | byte_to_ins_flags[target] |= MVM_CFG_BB_START; |
278 | 155k | if (target > 0 && target < pc - g->bytecode) { |
279 | 90.4k | while (!byte_to_ins_flags[--target]); |
280 | 14.0k | byte_to_ins_flags[target] |= MVM_CFG_BB_END; |
281 | 14.0k | } |
282 | 155k | |
283 | 155k | /* Next instruction is also a BB start. */ |
284 | 155k | next_bbs = 1; |
285 | 155k | |
286 | 155k | arg_size += 4; |
287 | 155k | break; |
288 | 0 | } |
289 | 41.7k | case MVM_operand_spesh_slot: |
290 | 41.7k | ins_node->operands[i].lit_i16 = GET_I16(args, arg_size); |
291 | 41.7k | arg_size += 2; |
292 | 41.7k | break; |
293 | 0 | default: |
294 | 0 | MVM_oops(tc, |
295 | 0 | "Spesh: unknown operand type %d in graph building (op %s)", |
296 | 0 | (int)type, ins_node->info->name); |
297 | 764k | } |
298 | 764k | break; |
299 | 0 | default: |
300 | 0 | break; |
301 | 764k | } |
302 | 2.80M | } |
303 | 2.80M | } |
304 | 1.30M | |
305 | 1.30M | /* We specially handle the jumplist case, which needs to mark all of |
306 | 1.30M | * the possible places we could jump to in the following instructions |
307 | 1.30M | * as starts of basic blocks. It is, in itself, the end of one. Note |
308 | 1.30M | * we jump to the instruction after the n jump points if none match, |
309 | 1.30M | * so that is marked too. */ |
310 | 1.30M | if (opcode == MVM_OP_jumplist) { |
311 | 713 | MVMint64 n = MVM_BC_get_I64(args, 0); |
312 | 7.40k | for (i = 0; i <= n; i++) |
313 | 6.68k | byte_to_ins_flags[(pc - g->bytecode) + 12 + i * 6] |= |
314 | 6.68k | MVM_CFG_BB_START | MVM_CFG_BB_JUMPLIST; |
315 | 713 | byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; |
316 | 713 | } |
317 | 1.30M | |
318 | 1.30M | /* Invoke and return end a basic block. Anything that is marked as |
319 | 1.30M | * invokish and throwish are also basic block ends. OSR points are |
320 | 1.30M | * basic block starts. */ |
321 | 1.30M | switch (opcode) { |
322 | 59.5k | case MVM_OP_invoke_v: |
323 | 59.5k | case MVM_OP_invoke_i: |
324 | 59.5k | case MVM_OP_invoke_n: |
325 | 59.5k | case MVM_OP_invoke_s: |
326 | 59.5k | case MVM_OP_invoke_o: |
327 | 59.5k | case MVM_OP_return_i: |
328 | 59.5k | case MVM_OP_return_n: |
329 | 59.5k | case MVM_OP_return_s: |
330 | 59.5k | case MVM_OP_return_o: |
331 | 59.5k | case MVM_OP_return: |
332 | 59.5k | byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; |
333 | 59.5k | next_bbs = 1; |
334 | 59.5k | break; |
335 | 5.04k | case MVM_OP_osrpoint: |
336 | 5.04k | byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_START; |
337 | 5.04k | if (pc - g->bytecode > 0) { |
338 | 5.04k | MVMuint32 prev = pc - g->bytecode; |
339 | 40.2k | while (!byte_to_ins_flags[--prev]); |
340 | 5.04k | byte_to_ins_flags[prev] |= MVM_CFG_BB_END; |
341 | 5.04k | } |
342 | 5.04k | num_osr_points++; |
343 | 5.04k | break; |
344 | 1.24M | default: |
345 | 1.24M | if (info->jittivity & (MVM_JIT_INFO_THROWISH | MVM_JIT_INFO_INVOKISH)) { |
346 | 197k | byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; |
347 | 197k | next_bbs = 1; |
348 | 197k | } |
349 | 1.24M | break; |
350 | 1.30M | } |
351 | 1.30M | |
352 | 1.30M | /* Final instruction is basic block end. */ |
353 | 1.30M | if (pc + 2 + arg_size == end) |
354 | 21.1k | byte_to_ins_flags[pc - g->bytecode] |= MVM_CFG_BB_END; |
355 | 1.30M | |
356 | 1.30M | /* If the instruction is logged, store its program counter so we can |
357 | 1.30M | * associate it with a static value later. */ |
358 | 1.30M | if (info->logged) |
359 | 86.2k | add_logged_annotation(tc, g, ins_node, pc); |
360 | 1.30M | |
361 | 1.30M | /* Caculate next instruction's PC. */ |
362 | 1.30M | pc += 2 + arg_size; |
363 | 1.30M | |
364 | 1.30M | /* If this is a post-instruction deopt point opcode... */ |
365 | 1.30M | if (!existing_deopts && (info->deopt_point & MVM_DEOPT_MARK_ONE)) |
366 | 183k | MVM_spesh_graph_add_deopt_annotation(tc, g, ins_node, |
367 | 183k | pc - g->bytecode, MVM_SPESH_ANN_DEOPT_ONE_INS); |
368 | 1.30M | if (!existing_deopts && (info->deopt_point & MVM_DEOPT_MARK_ALL)) |
369 | 36.0k | MVM_spesh_graph_add_deopt_annotation(tc, g, ins_node, |
370 | 36.0k | pc - g->bytecode, MVM_SPESH_ANN_DEOPT_ALL_INS); |
371 | 1.30M | if (!existing_deopts && (info->deopt_point & MVM_DEOPT_MARK_OSR)) |
372 | 5.04k | MVM_spesh_graph_add_deopt_annotation(tc, g, ins_node, |
373 | 5.04k | pc - g->bytecode, MVM_SPESH_ANN_DEOPT_OSR); |
374 | 1.30M | |
375 | 1.30M | /* Go to next instruction. */ |
376 | 1.30M | ins_idx++; |
377 | 1.30M | } |
378 | 21.1k | |
379 | 21.1k | /* Annotate instructions that are handler-significant. */ |
380 | 31.4k | for (i = 0; i < g->num_handlers; i++) { |
381 | 10.2k | /* Start or got may be -1 if the code the handler covered became |
382 | 10.2k | * dead. If so, mark the handler as removed. */ |
383 | 10.2k | if (g->handlers[i].start_offset == -1 || g->handlers[i].goto_offset == -1) { |
384 | 0 | if (!g->unreachable_handlers) |
385 | 0 | g->unreachable_handlers = MVM_spesh_alloc(tc, g, g->num_handlers); |
386 | 0 | g->unreachable_handlers[i] = 1; |
387 | 0 | } |
388 | 10.2k | else { |
389 | 10.2k | MVMSpeshIns *start_ins = ins_flat[byte_to_ins_flags[g->handlers[i].start_offset] >> 3]; |
390 | 10.2k | MVMSpeshIns *end_ins = ins_flat[byte_to_ins_flags[g->handlers[i].end_offset] >> 3]; |
391 | 10.2k | MVMSpeshAnn *start_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); |
392 | 10.2k | MVMSpeshAnn *end_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); |
393 | 10.2k | MVMSpeshIns *goto_ins = ins_flat[byte_to_ins_flags[g->handlers[i].goto_offset] >> 3]; |
394 | 10.2k | MVMSpeshAnn *goto_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); |
395 | 10.2k | |
396 | 10.2k | start_ann->next = start_ins->annotations; |
397 | 10.2k | start_ann->type = MVM_SPESH_ANN_FH_START; |
398 | 10.2k | start_ann->data.frame_handler_index = i; |
399 | 10.2k | start_ins->annotations = start_ann; |
400 | 10.2k | |
401 | 10.2k | end_ann->next = end_ins->annotations; |
402 | 10.2k | end_ann->type = MVM_SPESH_ANN_FH_END; |
403 | 10.2k | end_ann->data.frame_handler_index = i; |
404 | 10.2k | end_ins->annotations = end_ann; |
405 | 10.2k | |
406 | 10.2k | goto_ann->next = goto_ins->annotations; |
407 | 10.2k | goto_ann->type = MVM_SPESH_ANN_FH_GOTO; |
408 | 10.2k | goto_ann->data.frame_handler_index = i; |
409 | 10.2k | goto_ins->annotations = goto_ann; |
410 | 10.2k | } |
411 | 10.2k | } |
412 | 21.1k | |
413 | 21.1k | /* Annotate instructions that are inline start/end points. */ |
414 | 22.1k | for (i = 0; i < g->num_inlines; i++) { |
415 | 996 | if (!g->inlines[i].unreachable) { |
416 | 996 | MVMSpeshIns *start_ins = ins_flat[byte_to_ins_flags[g->inlines[i].start] >> 3]; |
417 | 996 | MVMSpeshIns *end_ins = ins_flat[byte_to_ins_flags[g->inlines[i].end] >> 3]; |
418 | 996 | MVMSpeshAnn *start_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); |
419 | 996 | MVMSpeshAnn *end_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); |
420 | 996 | |
421 | 996 | start_ann->next = start_ins->annotations; |
422 | 996 | start_ann->type = MVM_SPESH_ANN_INLINE_START; |
423 | 996 | start_ann->data.inline_idx = i; |
424 | 996 | start_ins->annotations = start_ann; |
425 | 996 | |
426 | 996 | end_ann->next = end_ins->annotations; |
427 | 996 | end_ann->type = MVM_SPESH_ANN_INLINE_END; |
428 | 996 | end_ann->data.inline_idx = i; |
429 | 996 | end_ins->annotations = end_ann; |
430 | 996 | } |
431 | 996 | } |
432 | 21.1k | |
433 | 21.1k | /* Now for the second pass, where we assemble the basic blocks. Also we |
434 | 21.1k | * build a lookup table of instructions that start a basic block to that |
435 | 21.1k | * basic block, for the final CFG construction. We make the entry block a |
436 | 21.1k | * special one, containing a noop; it will have any catch exception |
437 | 21.1k | * handler targets linked from it, so they show up in the graph. For any |
438 | 21.1k | * control exceptions, we will insert */ |
439 | 21.1k | g->entry = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshBB)); |
440 | 21.1k | g->entry->first_ins = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshIns)); |
441 | 21.1k | g->entry->first_ins->info = get_op_info(tc, cu, 0); |
442 | 21.1k | g->entry->last_ins = g->entry->first_ins; |
443 | 21.1k | g->entry->idx = 0; |
444 | 21.1k | cur_bb = NULL; |
445 | 21.1k | prev_bb = g->entry; |
446 | 21.1k | last_ins = NULL; |
447 | 21.1k | ins_to_bb = MVM_calloc(ins_idx, sizeof(MVMSpeshBB *)); |
448 | 21.1k | ins_idx = 0; |
449 | 21.1k | bb_idx = 1; |
450 | 8.90M | for (i = 0; i < g->bytecode_size; i++) { |
451 | 8.88M | MVMSpeshIns *cur_ins; |
452 | 8.88M | |
453 | 8.88M | /* Skip zeros; no instruction here. */ |
454 | 8.88M | if (!byte_to_ins_flags[i]) |
455 | 7.58M | continue; |
456 | 8.88M | |
457 | 8.88M | /* Get current instruction. */ |
458 | 1.30M | cur_ins = ins_flat[byte_to_ins_flags[i] >> 3]; |
459 | 1.30M | |
460 | 1.30M | /* Start of a basic block? */ |
461 | 1.30M | if (byte_to_ins_flags[i] & MVM_CFG_BB_START) { |
462 | 476k | /* Should not already be in a basic block. */ |
463 | 476k | if (cur_bb) { |
464 | 0 | MVM_spesh_graph_destroy(tc, g); |
465 | 0 | MVM_oops(tc, "Spesh: confused during basic block analysis (in block)"); |
466 | 0 | } |
467 | 476k | |
468 | 476k | /* Create it, and set first instruction and index. */ |
469 | 476k | cur_bb = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshBB)); |
470 | 476k | cur_bb->first_ins = cur_ins; |
471 | 476k | cur_bb->idx = bb_idx; |
472 | 476k | cur_bb->initial_pc = i; |
473 | 476k | cur_bb->jumplist = byte_to_ins_flags[i] & MVM_CFG_BB_JUMPLIST; |
474 | 476k | bb_idx++; |
475 | 476k | |
476 | 476k | /* Record instruction -> BB start mapping. */ |
477 | 476k | ins_to_bb[ins_idx] = cur_bb; |
478 | 476k | |
479 | 476k | /* Link it to the previous one. */ |
480 | 476k | prev_bb->linear_next = cur_bb; |
481 | 476k | } |
482 | 1.30M | |
483 | 1.30M | /* Should always be in a BB at this point. */ |
484 | 1.30M | if (!cur_bb) { |
485 | 0 | MVM_spesh_graph_destroy(tc, g); |
486 | 0 | MVM_oops(tc, "Spesh: confused during basic block analysis (no block)"); |
487 | 0 | } |
488 | 1.30M | |
489 | 1.30M | /* Add instruction into double-linked per-block instruction list. */ |
490 | 1.30M | if (last_ins) { |
491 | 827k | last_ins->next = cur_ins; |
492 | 827k | cur_ins->prev = last_ins; |
493 | 827k | } |
494 | 1.30M | last_ins = cur_ins; |
495 | 1.30M | |
496 | 1.30M | /* End of a basic block? */ |
497 | 1.30M | if (byte_to_ins_flags[i] & MVM_CFG_BB_END) { |
498 | 476k | cur_bb->last_ins = cur_ins; |
499 | 476k | prev_bb = cur_bb; |
500 | 476k | cur_bb = NULL; |
501 | 476k | last_ins = NULL; |
502 | 476k | } |
503 | 1.30M | |
504 | 1.30M | ins_idx++; |
505 | 1.30M | } |
506 | 21.1k | g->num_bbs = bb_idx; |
507 | 21.1k | |
508 | 21.1k | /* Finally, link the basic blocks up to form a CFG. Along the way, any of |
509 | 21.1k | * the instruction operands get the target BB stored. This is where we |
510 | 21.1k | * link basic blocks covered by control exception handlers to the goto |
511 | 21.1k | * block of the handler also. */ |
512 | 21.1k | cur_bb = g->entry; |
513 | 518k | while (cur_bb) { |
514 | 497k | /* If it's the first block, it's a special case; successors are the |
515 | 497k | * real successor, all catch exception handlers, and all OSR points. |
516 | 497k | */ |
517 | 497k | if (cur_bb == g->entry) { |
518 | 21.1k | MVMint32 num_bbs = 1 + g->num_handlers + num_osr_points; |
519 | 21.1k | MVMint32 insert_pos = 1; |
520 | 21.1k | cur_bb->succ = MVM_spesh_alloc(tc, g, num_bbs * sizeof(MVMSpeshBB *)); |
521 | 21.1k | cur_bb->handler_succ = MVM_spesh_alloc(tc, g, g->num_handlers * sizeof(MVMSpeshBB *)); |
522 | 21.1k | cur_bb->succ[0] = cur_bb->linear_next; |
523 | 31.4k | for (i = 0; i < g->num_handlers; i++) { |
524 | 10.2k | if (is_catch_handler(tc, g, i)) { |
525 | 16 | MVMuint32 offset = g->handlers[i].goto_offset; |
526 | 16 | if (offset != -1) |
527 | 16 | cur_bb->succ[insert_pos++] = ins_to_bb[byte_to_ins_flags[offset] >> 3]; |
528 | 16 | } |
529 | 10.2k | } |
530 | 21.1k | if (num_osr_points > 0) { |
531 | 2.11k | MVMSpeshBB *search_bb = cur_bb->linear_next; |
532 | 256k | while (search_bb) { |
533 | 254k | if (search_bb->first_ins->info->opcode == MVM_OP_osrpoint) |
534 | 5.04k | cur_bb->succ[insert_pos++] = search_bb; |
535 | 254k | search_bb = search_bb->linear_next; |
536 | 254k | } |
537 | 2.11k | } |
538 | 21.1k | cur_bb->num_succ = insert_pos; |
539 | 21.1k | } |
540 | 497k | |
541 | 497k | /* Otherwise, non-entry basic block. */ |
542 | 476k | else { |
543 | 476k | /* If this is the start of a frame handler that is not a catch, |
544 | 476k | * mark it as an active handler. Unmark those where we see the |
545 | 476k | * end of the handler. */ |
546 | 476k | if (cur_bb->first_ins->annotations) { |
547 | 112k | MVMSpeshAnn *ann = cur_bb->first_ins->annotations; |
548 | 261k | while (ann) { |
549 | 149k | switch (ann->type) { |
550 | 10.2k | case MVM_SPESH_ANN_FH_START: |
551 | 10.2k | if (!is_catch_handler(tc, g, ann->data.frame_handler_index)) { |
552 | 10.2k | active_handlers[ann->data.frame_handler_index] = 1; |
553 | 10.2k | num_active_handlers++; |
554 | 10.2k | } |
555 | 10.2k | break; |
556 | 10.2k | case MVM_SPESH_ANN_FH_END: |
557 | 10.2k | if (!is_catch_handler(tc, g, ann->data.frame_handler_index)) { |
558 | 10.2k | active_handlers[ann->data.frame_handler_index] = 0; |
559 | 10.2k | num_active_handlers--; |
560 | 10.2k | } |
561 | 10.2k | break; |
562 | 149k | } |
563 | 149k | ann = ann->next; |
564 | 149k | } |
565 | 112k | } |
566 | 476k | |
567 | 476k | /* Consider the last instruction, to see how we leave the BB. */ |
568 | 476k | switch (cur_bb->last_ins->info->opcode) { |
569 | 713 | case MVM_OP_jumplist: { |
570 | 713 | /* Jumplist, so successors are next N+1 basic blocks. */ |
571 | 713 | MVMint64 jump_bbs = cur_bb->last_ins->operands[0].lit_i64 + 1; |
572 | 713 | MVMint64 num_bbs = jump_bbs + num_active_handlers; |
573 | 713 | MVMSpeshBB *bb_to_add = cur_bb->linear_next; |
574 | 713 | cur_bb->succ = MVM_spesh_alloc(tc, g, num_bbs * sizeof(MVMSpeshBB *)); |
575 | 7.40k | for (i = 0; i < jump_bbs; i++) { |
576 | 6.68k | cur_bb->succ[i] = bb_to_add; |
577 | 6.68k | bb_to_add = bb_to_add->linear_next; |
578 | 6.68k | } |
579 | 713 | cur_bb->num_succ = jump_bbs; |
580 | 713 | } |
581 | 713 | break; |
582 | 58.2k | case MVM_OP_goto: { |
583 | 58.2k | /* Unconditional branch, so one successor. */ |
584 | 58.2k | MVMint64 num_bbs = 1 + num_active_handlers; |
585 | 58.2k | MVMuint32 offset = cur_bb->last_ins->operands[0].ins_offset; |
586 | 58.2k | MVMSpeshBB *tgt = ins_to_bb[byte_to_ins_flags[offset] >> 3]; |
587 | 58.2k | cur_bb->succ = MVM_spesh_alloc(tc, g, num_bbs * sizeof(MVMSpeshBB *)); |
588 | 58.2k | cur_bb->succ[0] = tgt; |
589 | 58.2k | cur_bb->num_succ = 1; |
590 | 58.2k | cur_bb->last_ins->operands[0].ins_bb = tgt; |
591 | 58.2k | } |
592 | 58.2k | break; |
593 | 417k | default: { |
594 | 417k | /* Probably conditional branch, so two successors: one from |
595 | 417k | * the instruction, another from fall-through. Or may just be |
596 | 417k | * a non-branch that exits for other reasons. */ |
597 | 417k | MVMint64 num_bbs = 2 + num_active_handlers; |
598 | 417k | cur_bb->succ = MVM_spesh_alloc(tc, g, num_bbs * sizeof(MVMSpeshBB *)); |
599 | 1.28M | for (i = 0; i < cur_bb->last_ins->info->num_operands; i++) { |
600 | 868k | if (cur_bb->last_ins->info->operands[i] == MVM_operand_ins) { |
601 | 96.9k | MVMuint32 offset = cur_bb->last_ins->operands[i].ins_offset; |
602 | 96.9k | cur_bb->succ[0] = ins_to_bb[byte_to_ins_flags[offset] >> 3]; |
603 | 96.9k | cur_bb->num_succ++; |
604 | 96.9k | cur_bb->last_ins->operands[i].ins_bb = cur_bb->succ[0]; |
605 | 96.9k | } |
606 | 868k | } |
607 | 417k | if (cur_bb->num_succ > 1) { |
608 | 0 | /* If we ever get instructions with multiple targets, this |
609 | 0 | * area of the code needs an update. */ |
610 | 0 | MVM_spesh_graph_destroy(tc, g); |
611 | 0 | MVM_oops(tc, "Spesh: unhandled multi-target branch"); |
612 | 0 | } |
613 | 417k | if (cur_bb->linear_next) { |
614 | 395k | cur_bb->succ[cur_bb->num_succ] = cur_bb->linear_next; |
615 | 395k | cur_bb->num_succ++; |
616 | 395k | } |
617 | 417k | } |
618 | 417k | break; |
619 | 476k | } |
620 | 476k | |
621 | 476k | /* Attach this block to the goto block of any active handlers. */ |
622 | 476k | if ( |
623 | 476k | num_active_handlers |
624 | 167k | && ( |
625 | 167k | cur_bb->last_ins->info->jittivity & (MVM_JIT_INFO_THROWISH | MVM_JIT_INFO_INVOKISH) |
626 | 94.1k | || cur_bb->last_ins->info->opcode == MVM_OP_invoke_v |
627 | 94.0k | || cur_bb->last_ins->info->opcode == MVM_OP_invoke_i |
628 | 94.0k | || cur_bb->last_ins->info->opcode == MVM_OP_invoke_n |
629 | 94.0k | || cur_bb->last_ins->info->opcode == MVM_OP_invoke_s |
630 | 94.0k | || cur_bb->last_ins->info->opcode == MVM_OP_invoke_o |
631 | 167k | ) |
632 | 87.5k | ) { |
633 | 87.5k | cur_bb->handler_succ = MVM_spesh_alloc(tc, g, num_active_handlers * sizeof(MVMSpeshBB *)); |
634 | 945k | for (i = 0; i < g->num_handlers; i++) { |
635 | 857k | if (active_handlers[i]) { |
636 | 242k | MVMuint32 offset = g->handlers[i].goto_offset; |
637 | 242k | MVMSpeshBB *target = ins_to_bb[byte_to_ins_flags[offset] >> 3]; |
638 | 242k | if (!already_succs(tc, cur_bb, target)) { |
639 | 238k | cur_bb->succ[cur_bb->num_succ] = target; |
640 | 238k | cur_bb->num_succ++; |
641 | 238k | cur_bb->handler_succ[cur_bb->num_handler_succ++] = target; |
642 | 238k | } |
643 | 242k | } |
644 | 857k | } |
645 | 87.5k | } |
646 | 476k | else |
647 | 388k | cur_bb->handler_succ = NULL; |
648 | 476k | } |
649 | 497k | |
650 | 497k | /* Move on to next block. */ |
651 | 497k | cur_bb = cur_bb->linear_next; |
652 | 497k | } |
653 | 21.1k | |
654 | 21.1k | /* If we're building the graph for optimized bytecode, insert existing |
655 | 21.1k | * deopt points. */ |
656 | 21.1k | if (existing_deopts) { |
657 | 101k | for (i = 0; i < num_existing_deopts; i ++) { |
658 | 91.0k | if (existing_deopts[2 * i + 1] >= 0) { |
659 | 73.1k | MVMSpeshIns *post_ins = ins_flat[byte_to_ins_flags[existing_deopts[2 * i + 1]] >> 3]; |
660 | 64.1k | MVMSpeshIns *deopt_ins = post_ins->prev ? post_ins->prev : |
661 | 9.01k | MVM_spesh_graph_linear_prev(tc, g, |
662 | 9.01k | ins_to_bb[byte_to_ins_flags[existing_deopts[2 * i + 1]] >> 3])->last_ins; |
663 | 73.1k | MVMSpeshAnn *deopt_ann = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshAnn)); |
664 | 73.1k | deopt_ann->next = deopt_ins->annotations; |
665 | 73.1k | deopt_ann->type = MVM_SPESH_ANN_DEOPT_INLINE; |
666 | 73.1k | deopt_ann->data.deopt_idx = i; |
667 | 73.1k | deopt_ins->annotations = deopt_ann; |
668 | 73.1k | } |
669 | 91.0k | } |
670 | 10.0k | } |
671 | 21.1k | |
672 | 21.1k | /* Clear up the temporary arrays. */ |
673 | 21.1k | MVM_free(byte_to_ins_flags); |
674 | 21.1k | MVM_free(ins_flat); |
675 | 21.1k | MVM_free(ins_to_bb); |
676 | 21.1k | MVM_free(ann_ptr); |
677 | 21.1k | MVM_free(active_handlers); |
678 | 21.1k | } |
679 | | |
680 | | /* Inserts nulling of object reigsters. A later stage of the optimizer will |
681 | | * throw out any that are unrequired, leaving only those that cover (rare) |
682 | | * "register read before assigned" cases. (We can thus just start off with |
683 | | * them NULL, since zeroed memory is cheaper than copying a VMNull in to |
684 | | * place). */ |
685 | 105k | static MVMint32 is_handler_reg(MVMThreadContext *tc, MVMSpeshGraph *g, MVMuint16 reg) { |
686 | 105k | MVMuint32 num_handlers = g->num_handlers; |
687 | 105k | MVMuint32 i; |
688 | 409k | for (i = 0; i < num_handlers; i++) |
689 | 304k | if (g->handlers[i].action == MVM_EX_ACTION_INVOKE) |
690 | 274 | if (g->handlers[i].block_reg == reg) |
691 | 15 | return 1; |
692 | 105k | return 0; |
693 | 105k | } |
694 | 11.1k | static void insert_object_null_instructions(MVMThreadContext *tc, MVMSpeshGraph *g) { |
695 | 11.1k | MVMSpeshBB *insert_bb = g->entry->linear_next; |
696 | 11.1k | MVMuint16 *local_types = g->sf->body.local_types; |
697 | 11.1k | MVMuint16 num_locals = g->sf->body.num_locals; |
698 | 11.1k | MVMuint16 i; |
699 | 11.1k | MVMSpeshIns *insert_after = NULL; |
700 | 11.1k | if (insert_bb->first_ins && insert_bb->first_ins->info->opcode == MVM_OP_prof_enter) { |
701 | 0 | insert_after = insert_bb->first_ins; |
702 | 0 | } |
703 | 181k | for (i = 0; i < num_locals; i++) { |
704 | 170k | if (local_types[i] == MVM_reg_obj && !is_handler_reg(tc, g, i)) { |
705 | 105k | MVMSpeshIns *null_ins = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshIns)); |
706 | 105k | null_ins->info = MVM_op_get_op(MVM_OP_null); |
707 | 105k | null_ins->operands = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshOperand)); |
708 | 105k | null_ins->operands[0].reg.orig = i; |
709 | 105k | MVM_spesh_manipulate_insert_ins(tc, insert_bb, insert_after, null_ins); |
710 | 105k | } |
711 | 170k | } |
712 | 11.1k | } |
713 | | |
714 | | /* Annotates the control flow graph with predecessors. */ |
715 | 32.2k | static void add_predecessors(MVMThreadContext *tc, MVMSpeshGraph *g) { |
716 | 32.2k | MVMSpeshBB *cur_bb = g->entry; |
717 | 851k | while (cur_bb) { |
718 | 819k | MVMuint16 i; |
719 | 2.25M | for (i = 0; i < cur_bb->num_succ; i++) { |
720 | 1.43M | MVMSpeshBB *tgt = cur_bb->succ[i]; |
721 | 1.43M | MVMSpeshBB **new_pred = MVM_spesh_alloc(tc, g, |
722 | 1.43M | (tgt->num_pred + 1) * sizeof(MVMSpeshBB *)); |
723 | 1.43M | if (tgt->num_pred) |
724 | 649k | memcpy(new_pred, tgt->pred, tgt->num_pred * sizeof(MVMSpeshBB *)); |
725 | 1.43M | new_pred[tgt->num_pred] = cur_bb; |
726 | 1.43M | tgt->pred = new_pred; |
727 | 1.43M | tgt->num_pred++; |
728 | 1.43M | } |
729 | 819k | cur_bb = cur_bb->linear_next; |
730 | 819k | } |
731 | 32.2k | } |
732 | | |
733 | | /* Produces an array of the basic blocks, sorted in reverse postorder from |
734 | | * the entry point. */ |
735 | 819k | static void dfs(MVMSpeshBB **rpo, MVMint32 *insert_pos, MVMuint8 *seen, MVMSpeshBB *bb) { |
736 | 819k | MVMint32 i; |
737 | 819k | seen[bb->idx] = 1; |
738 | 2.25M | for (i = 0; i < bb->num_succ; i++) { |
739 | 1.43M | MVMSpeshBB *succ = bb->succ[i]; |
740 | 1.43M | if (!seen[succ->idx]) |
741 | 787k | dfs(rpo, insert_pos, seen, succ); |
742 | 1.43M | } |
743 | 819k | rpo[*insert_pos] = bb; |
744 | 819k | bb->rpo_idx = *insert_pos; |
745 | 819k | (*insert_pos)--; |
746 | 819k | } |
747 | 32.2k | static MVMSpeshBB ** reverse_postorder(MVMThreadContext *tc, MVMSpeshGraph *g) { |
748 | 32.2k | MVMSpeshBB **rpo = MVM_calloc(g->num_bbs, sizeof(MVMSpeshBB *)); |
749 | 32.2k | MVMuint8 *seen = MVM_calloc(g->num_bbs, 1); |
750 | 32.2k | MVMint32 ins = g->num_bbs - 1; |
751 | 32.2k | dfs(rpo, &ins, seen, g->entry); |
752 | 32.2k | MVM_free(seen); |
753 | 32.2k | if (ins != -1) { |
754 | 0 | char *dump_msg = MVM_spesh_dump(tc, g); |
755 | 0 | printf("%s", dump_msg); |
756 | 0 | MVM_free(dump_msg); |
757 | 0 | MVM_spesh_graph_destroy(tc, g); |
758 | 0 | MVM_oops(tc, "Spesh: reverse postorder calculation failed"); |
759 | 0 | } |
760 | 32.2k | return rpo; |
761 | 32.2k | } |
762 | | |
763 | | /* 2-finger intersection algorithm, to find new immediate dominator. */ |
764 | 18.7M | static void iter_check(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB **rpo, MVMint32 *doms, MVMint32 iters) { |
765 | 18.7M | if (iters > 100000) { |
766 | 0 | #ifdef NDEBUG |
767 | | MVMint32 k; |
768 | | char *dump_msg = MVM_spesh_dump(tc, g); |
769 | | printf("%s", dump_msg); |
770 | | MVM_free(dump_msg); |
771 | | printf("RPO: "); |
772 | | for (k = 0; k < g->num_bbs; k++) |
773 | | printf("%d, ", rpo[k]->idx); |
774 | | printf("\n"); |
775 | | printf("Doms: "); |
776 | | for (k = 0; k < g->num_bbs; k++) |
777 | | printf("%d (%d), ", doms[k], doms[k] >= 0 ? rpo[doms[k]]->idx : -1); |
778 | | printf("\n"); |
779 | | #endif |
780 | 0 | MVM_spesh_graph_destroy(tc, g); |
781 | 0 | MVM_oops(tc, "Spesh: dominator intersection went infinite"); |
782 | 0 | } |
783 | 18.7M | } |
784 | 1.60M | static MVMint32 intersect(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB **rpo, MVMint32 *doms, MVMint32 finger1, MVMint32 finger2) { |
785 | 1.60M | MVMint32 iters = 0; |
786 | 3.22M | while (finger1 != finger2) { |
787 | 19.7M | while (finger1 > finger2) { |
788 | 18.0M | iter_check(tc, g, rpo, doms, iters++); |
789 | 18.0M | finger1 = doms[finger1]; |
790 | 18.0M | } |
791 | 2.27M | while (finger2 > finger1) { |
792 | 653k | iter_check(tc, g, rpo, doms, iters++); |
793 | 653k | finger2 = doms[finger2]; |
794 | 653k | } |
795 | 1.62M | } |
796 | 1.60M | return finger1; |
797 | 1.60M | } |
798 | | |
799 | | /* Computes dominator information about the basic blocks. */ |
800 | 32.2k | static MVMint32 * compute_dominators(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB **rpo) { |
801 | 32.2k | MVMint32 i, j, changed; |
802 | 32.2k | |
803 | 32.2k | /* Create result list, with all initialized to undefined (use -1, as it's |
804 | 32.2k | * not a valid basic block index). Start node dominates itself. */ |
805 | 32.2k | MVMint32 *doms = MVM_malloc(g->num_bbs * sizeof(MVMint32)); |
806 | 32.2k | doms[0] = 0; |
807 | 819k | for (i = 1; i < g->num_bbs; i++) |
808 | 787k | doms[i] = -1; |
809 | 32.2k | |
810 | 32.2k | /* Iterate to fixed point. */ |
811 | 32.2k | changed = 1; |
812 | 101k | while (changed) { |
813 | 68.7k | changed = 0; |
814 | 68.7k | |
815 | 68.7k | /* Visit all except the start node in reverse postorder. */ |
816 | 2.09M | for (i = 1; i < g->num_bbs; i++) { |
817 | 2.02M | MVMSpeshBB *b = rpo[i]; |
818 | 2.02M | |
819 | 2.02M | /* See if there's a better dominator. */ |
820 | 2.02M | MVMint32 chosen_pred = -1; |
821 | 2.02M | MVMint32 new_idom; |
822 | 2.02M | for (j = 0; j < b->num_pred; j++) { |
823 | 2.02M | new_idom = b->pred[j]->rpo_idx; |
824 | 2.02M | if (doms[new_idom] != -1) |
825 | 2.02M | { |
826 | 2.02M | chosen_pred = j; |
827 | 2.02M | break; |
828 | 2.02M | } |
829 | 2.02M | } |
830 | 2.02M | if (chosen_pred == -1) { |
831 | 0 | MVM_spesh_graph_destroy(tc, g); |
832 | 0 | MVM_oops(tc, "Spesh: could not find processed initial dominator"); |
833 | 0 | } |
834 | 5.90M | for (j = 0; j < b->num_pred; j++) { |
835 | 3.87M | if (j != chosen_pred) { |
836 | 1.84M | MVMint32 p_idx = b->pred[j]->rpo_idx; |
837 | 1.84M | if (doms[p_idx] != -1) |
838 | 1.60M | new_idom = intersect(tc, g, rpo, doms, p_idx, new_idom); |
839 | 1.84M | } |
840 | 3.87M | } |
841 | 2.02M | if (doms[i] != new_idom) { |
842 | 799k | doms[i] = new_idom; |
843 | 799k | changed = 1; |
844 | 799k | } |
845 | 2.02M | } |
846 | 68.7k | } |
847 | 32.2k | |
848 | 32.2k | return doms; |
849 | 32.2k | } |
850 | | |
851 | | /* Builds the dominance tree children lists for each node. */ |
852 | 785k | static void add_child(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB *target, MVMSpeshBB *to_add) { |
853 | 785k | MVMSpeshBB **new_children; |
854 | 785k | MVMint32 i; |
855 | 785k | |
856 | 785k | /* Already in the child list? */ |
857 | 1.41M | for (i = 0; i < target->num_children; i++) |
858 | 626k | if (target->children[i] == to_add) |
859 | 0 | return; |
860 | 785k | |
861 | 785k | /* Nope, so insert. */ |
862 | 785k | new_children = MVM_spesh_alloc(tc, g, (target->num_children + 1) * sizeof(MVMSpeshBB *)); |
863 | 785k | if (target->num_children) |
864 | 262k | memcpy(new_children, target->children, target->num_children * sizeof(MVMSpeshBB *)); |
865 | 785k | new_children[target->num_children] = to_add; |
866 | 785k | target->children = new_children; |
867 | 785k | target->num_children++; |
868 | 785k | } |
869 | 32.2k | static void add_children(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB **rpo, MVMint32 *doms) { |
870 | 32.2k | MVMint32 i; |
871 | 850k | for (i = 0; i < g->num_bbs; i++) { |
872 | 818k | MVMSpeshBB *bb = rpo[i]; |
873 | 818k | MVMint32 idom = doms[i]; |
874 | 818k | if (idom != i) |
875 | 785k | add_child(tc, g, rpo[idom], bb); |
876 | 818k | } |
877 | 32.2k | } |
878 | | |
879 | | /* Builds the dominance frontier set for each node. */ |
880 | 4.00M | static void add_to_frontier_set(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB *target, MVMSpeshBB *to_add) { |
881 | 4.00M | MVMSpeshBB **new_df; |
882 | 4.00M | MVMint32 i; |
883 | 4.00M | |
884 | 4.00M | /* Already in the set? */ |
885 | 11.3M | for (i = 0; i < target->num_df; i++) |
886 | 10.5M | if (target->df[i] == to_add) |
887 | 3.25M | return; |
888 | 4.00M | |
889 | 4.00M | /* Nope, so insert. */ |
890 | 748k | new_df = MVM_spesh_alloc(tc, g, (target->num_df + 1) * sizeof(MVMSpeshBB *)); |
891 | 748k | if (target->num_df) |
892 | 355k | memcpy(new_df, target->df, target->num_df * sizeof(MVMSpeshBB *)); |
893 | 748k | new_df[target->num_df] = to_add; |
894 | 748k | target->df = new_df; |
895 | 748k | target->num_df++; |
896 | 748k | } |
897 | 21.1k | static void add_dominance_frontiers(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB **rpo, MVMint32 *doms) { |
898 | 21.1k | MVMint32 j; |
899 | 21.1k | MVMSpeshBB *b = g->entry; |
900 | 516k | while (b) { |
901 | 495k | if (b->num_pred >= 2) { /* Thus it's a join point. */ |
902 | 528k | for (j = 0; j < b->num_pred; j++) { |
903 | 436k | MVMint32 runner = b->pred[j]->rpo_idx; |
904 | 436k | MVMint32 finish_line = doms[b->rpo_idx]; |
905 | 4.43M | while (runner != finish_line) { |
906 | 4.00M | add_to_frontier_set(tc, g, rpo[runner], b); |
907 | 4.00M | runner = doms[runner]; |
908 | 4.00M | } |
909 | 436k | } |
910 | 91.6k | } |
911 | 495k | b = b->linear_next; |
912 | 495k | } |
913 | 21.1k | } |
914 | | |
915 | | /* Per-local SSA info. */ |
916 | | typedef struct { |
917 | | /* Nodes that assign to the variable. */ |
918 | | MVMSpeshBB **ass_nodes; |
919 | | MVMuint16 num_ass_nodes; |
920 | | |
921 | | /* Count of processed assignments aka. C(V). */ |
922 | | MVMint32 count; |
923 | | |
924 | | /* Stack of integers aka. S(V). */ |
925 | | MVMint32 *stack; |
926 | | MVMint32 stack_top; |
927 | | MVMint32 stack_alloc; |
928 | | } SSAVarInfo; |
929 | | |
930 | | /* Creates an SSAVarInfo for each local, initializing it with a list of nodes |
931 | | * that assign to the local. */ |
932 | 21.1k | static SSAVarInfo * initialize_ssa_var_info(MVMThreadContext *tc, MVMSpeshGraph *g) { |
933 | 21.1k | SSAVarInfo *var_info = MVM_calloc(g->num_locals, sizeof(SSAVarInfo)); |
934 | 21.1k | MVMint32 i; |
935 | 21.1k | |
936 | 21.1k | /* Visit all instructions, looking for local writes. */ |
937 | 21.1k | MVMSpeshBB *bb = g->entry; |
938 | 516k | while (bb) { |
939 | 495k | MVMSpeshIns *ins = bb->first_ins; |
940 | 1.92M | while (ins) { |
941 | 4.32M | for (i = 0; i < ins->info->num_operands; i++) { |
942 | 2.90M | if ((ins->info->operands[i] & MVM_operand_rw_mask) == MVM_operand_write_reg) { |
943 | 1.01M | MVMuint16 written = ins->operands[i].reg.orig; |
944 | 1.01M | MVMint32 found = 0; |
945 | 1.01M | MVMint32 j; |
946 | 6.32M | for (j = 0; j < var_info[written].num_ass_nodes; j++) |
947 | 5.48M | if (var_info[written].ass_nodes[j] == bb) { |
948 | 171k | found = 1; |
949 | 171k | break; |
950 | 171k | } |
951 | 1.01M | if (!found) { |
952 | 841k | if (var_info[written].num_ass_nodes % 8 == 0) { |
953 | 286k | MVMint32 new_size = var_info[written].num_ass_nodes + 8; |
954 | 286k | var_info[written].ass_nodes = MVM_realloc( |
955 | 286k | var_info[written].ass_nodes, |
956 | 286k | new_size * sizeof(MVMSpeshBB *)); |
957 | 286k | } |
958 | 841k | var_info[written].ass_nodes[var_info[written].num_ass_nodes] = bb; |
959 | 841k | var_info[written].num_ass_nodes++; |
960 | 841k | } |
961 | 1.01M | } |
962 | 2.90M | } |
963 | 1.42M | ins = ins->next; |
964 | 1.42M | } |
965 | 495k | bb = bb->linear_next; |
966 | 495k | } |
967 | 21.1k | |
968 | 21.1k | /* Set stack top to -1 sentinel for all nodes, and count = 1 (as we may |
969 | 21.1k | * read the default value of a register). */ |
970 | 283k | for (i = 0; i < g->num_locals; i++) { |
971 | 261k | var_info[i].count = 1; |
972 | 261k | var_info[i].stack_top = -1; |
973 | 261k | } |
974 | 21.1k | |
975 | 21.1k | return var_info; |
976 | 21.1k | } |
977 | | |
978 | 980k | MVMOpInfo *get_phi(MVMThreadContext *tc, MVMSpeshGraph *g, MVMuint32 nrargs) { |
979 | 980k | MVMOpInfo *result = NULL; |
980 | 980k | |
981 | 980k | /* Check number of args to phi isn't huge. */ |
982 | 980k | if (nrargs > 0xFFFF) |
983 | 0 | MVM_panic(1, "Spesh: SSA calculation failed; cannot allocate enormous PHI node"); |
984 | 980k | |
985 | 980k | /* Up to 64 args, almost every number is represented, but after that |
986 | 980k | * we have a sparse array through which we must search. */ |
987 | 980k | if (nrargs - 2 < MVMPhiNodeCacheSparseBegin) { |
988 | 872k | result = &g->phi_infos[nrargs - 2]; |
989 | 108k | } else { |
990 | 108k | MVMint32 cache_idx; |
991 | 108k | |
992 | 381k | for (cache_idx = MVMPhiNodeCacheSparseBegin; !result && cache_idx < MVMPhiNodeCacheSize; cache_idx++) { |
993 | 273k | if (g->phi_infos[cache_idx].opcode == MVM_SSA_PHI) { |
994 | 272k | if (g->phi_infos[cache_idx].num_operands == nrargs) { |
995 | 106k | result = &g->phi_infos[cache_idx]; |
996 | 106k | } |
997 | 1.60k | } else { |
998 | 1.60k | result = &g->phi_infos[cache_idx]; |
999 | 1.60k | } |
1000 | 273k | } |
1001 | 108k | } |
1002 | 980k | |
1003 | 980k | if (result == NULL) { |
1004 | 4 | result = MVM_spesh_alloc(tc, g, sizeof(MVMOpInfo)); |
1005 | 4 | result->opcode = 0; |
1006 | 4 | } |
1007 | 980k | |
1008 | 980k | if (result->opcode != MVM_SSA_PHI) { |
1009 | 25.5k | result->opcode = MVM_SSA_PHI; |
1010 | 25.5k | result->name = "PHI"; |
1011 | 25.5k | result->num_operands = nrargs; |
1012 | 25.5k | } |
1013 | 980k | |
1014 | 980k | return result; |
1015 | 980k | } |
1016 | | |
1017 | | /* Inserts SSA phi functions at the required places in the graph. */ |
1018 | 967k | static void place_phi(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB *bb, MVMint32 n, MVMuint16 var) { |
1019 | 967k | MVMint32 i; |
1020 | 967k | MVMOpInfo *phi_op = get_phi(tc, g, n + 1); |
1021 | 967k | MVMSpeshIns *ins = MVM_spesh_alloc(tc, g, sizeof(MVMSpeshIns)); |
1022 | 967k | ins->info = phi_op; |
1023 | 967k | ins->operands = MVM_spesh_alloc(tc, g, phi_op->num_operands * sizeof(MVMSpeshOperand)); |
1024 | 19.6M | for (i = 0; i < phi_op->num_operands; i++) |
1025 | 18.6M | ins->operands[i].reg.orig = var; |
1026 | 967k | ins->next = bb->first_ins; |
1027 | 967k | bb->first_ins->prev = ins; |
1028 | 967k | bb->first_ins = ins; |
1029 | 967k | } |
1030 | 21.1k | static void insert_phi_functions(MVMThreadContext *tc, MVMSpeshGraph *g, SSAVarInfo *var_info) { |
1031 | 21.1k | MVMint32 *has_already = MVM_calloc(g->num_bbs, sizeof(MVMint32)); |
1032 | 21.1k | MVMint32 *work = MVM_calloc(g->num_bbs, sizeof(MVMint32)); |
1033 | 21.1k | MVMSpeshBB **worklist = MVM_calloc(g->num_bbs, sizeof(MVMSpeshBB *)); |
1034 | 21.1k | MVMint32 worklist_top = 0; |
1035 | 21.1k | MVMint32 iter_count = 0; |
1036 | 21.1k | |
1037 | 21.1k | /* Go over all locals. */ |
1038 | 21.1k | MVMint32 var, i, j, found; |
1039 | 283k | for (var = 0; var < g->num_locals; var++) { |
1040 | 261k | /* Move to next iteration. */ |
1041 | 261k | iter_count++; |
1042 | 261k | |
1043 | 261k | /* Add blocks assigning to this variable to the worklist. */ |
1044 | 1.10M | for (i = 0; i < var_info[var].num_ass_nodes; i++) { |
1045 | 838k | MVMSpeshBB *bb = var_info[var].ass_nodes[i]; |
1046 | 838k | work[bb->idx] = iter_count; |
1047 | 838k | worklist[worklist_top++] = bb; /* Algo unions, but ass_nodes unique. */ |
1048 | 838k | } |
1049 | 261k | |
1050 | 261k | /* Process the worklist. */ |
1051 | 2.00M | while (worklist_top) { |
1052 | 1.74M | MVMSpeshBB *x = worklist[--worklist_top]; |
1053 | 5.07M | for (i = 0; i < x->num_df; i++) { |
1054 | 3.33M | MVMSpeshBB *y = x->df[i]; |
1055 | 3.33M | if (has_already[y->idx] < iter_count) { |
1056 | 967k | /* Place phi function, and mark we have. */ |
1057 | 967k | place_phi(tc, g, y, y->num_pred, var); |
1058 | 967k | has_already[y->idx] = iter_count; |
1059 | 967k | |
1060 | 967k | /* Add this block to worklist if needed. */ |
1061 | 967k | if (work[y->idx] < iter_count) { |
1062 | 904k | work[y->idx] = iter_count; |
1063 | 904k | found = 0; |
1064 | 6.67M | for (j = 0; j < worklist_top; j++) |
1065 | 5.77M | if (worklist[j] == y) { |
1066 | 0 | found = 1; |
1067 | 0 | break; |
1068 | 0 | } |
1069 | 904k | if (!found) |
1070 | 904k | worklist[worklist_top++] = y; |
1071 | 904k | } |
1072 | 967k | } |
1073 | 3.33M | } |
1074 | 1.74M | } |
1075 | 261k | } |
1076 | 21.1k | |
1077 | 21.1k | MVM_free(has_already); |
1078 | 21.1k | MVM_free(work); |
1079 | 21.1k | MVM_free(worklist); |
1080 | 21.1k | } |
1081 | | |
1082 | | /* Renames the local variables such that we end up with SSA form. */ |
1083 | 814k | static MVMint32 which_pred(MVMThreadContext *tc, MVMSpeshGraph *g, MVMSpeshBB *y, MVMSpeshBB *x) { |
1084 | 814k | MVMint32 i; |
1085 | 17.8M | for (i = 0; i < y->num_pred; i++) |
1086 | 17.8M | if (y->pred[i] == x) |
1087 | 814k | return i; |
1088 | 0 | MVM_spesh_graph_destroy(tc, g); |
1089 | 0 | MVM_oops(tc, "Spesh: which_pred failed to find x"); |
1090 | 0 | } |
1091 | 492k | static void rename_locals(MVMThreadContext *tc, MVMSpeshGraph *g, SSAVarInfo *var_info, MVMSpeshBB *x) { |
1092 | 492k | MVMint32 i; |
1093 | 492k | |
1094 | 492k | /* Visit instructions and do renames in normal (non-phi) instructions. */ |
1095 | 492k | MVMSpeshIns *a = x->first_ins; |
1096 | 2.87M | while (a) { |
1097 | 2.38M | /* Rename reads, provided it's not a PHI. */ |
1098 | 2.38M | MVMint32 is_phi = a->info->opcode == MVM_SSA_PHI; |
1099 | 2.38M | if (!is_phi) { |
1100 | 4.30M | for (i = 0; i < a->info->num_operands; i++) { |
1101 | 2.88M | if ((a->info->operands[i] & MVM_operand_rw_mask) == MVM_operand_read_reg) { |
1102 | 1.10M | MVMuint16 orig = a->operands[i].reg.orig; |
1103 | 1.10M | MVMint32 st = var_info[orig].stack_top; |
1104 | 1.10M | if (st >= 0) |
1105 | 1.10M | a->operands[i].reg.i = var_info[orig].stack[st]; |
1106 | 1.10M | else |
1107 | 0 | a->operands[i].reg.i = 0; |
1108 | 1.10M | } |
1109 | 2.88M | } |
1110 | 1.42M | } |
1111 | 2.38M | |
1112 | 2.38M | /* Rename writes. */ |
1113 | 5.27M | for (i = 0; i < a->info->num_operands; i++) { |
1114 | 3.85M | if (is_phi || (a->info->operands[i] & MVM_operand_rw_mask) == MVM_operand_write_reg) { |
1115 | 1.97M | MVMuint16 orig = a->operands[i].reg.orig; |
1116 | 1.97M | MVMint32 reg_i = var_info[orig].count; |
1117 | 1.97M | a->operands[i].reg.i = reg_i; |
1118 | 1.97M | if (var_info[orig].stack_top + 1 >= var_info[orig].stack_alloc) { |
1119 | 268k | if (var_info[orig].stack_alloc) |
1120 | 17.2k | var_info[orig].stack_alloc *= 2; |
1121 | 268k | else |
1122 | 251k | var_info[orig].stack_alloc = 8; |
1123 | 268k | var_info[orig].stack = MVM_realloc(var_info[orig].stack, |
1124 | 268k | var_info[orig].stack_alloc * sizeof(MVMint32)); |
1125 | 268k | } |
1126 | 1.97M | var_info[orig].stack[++var_info[orig].stack_top] = reg_i; |
1127 | 1.97M | var_info[orig].count++; |
1128 | 1.97M | } |
1129 | 3.85M | if (is_phi) |
1130 | 964k | break; |
1131 | 3.85M | } |
1132 | 2.38M | |
1133 | 2.38M | a = a->next; |
1134 | 2.38M | } |
1135 | 492k | |
1136 | 492k | /* Visit successors and update their phi functions. */ |
1137 | 1.30M | for (i = 0; i < x->num_succ; i++) { |
1138 | 814k | MVMSpeshBB *y = x->succ[i]; |
1139 | 814k | MVMint32 j = which_pred(tc, g, y, x); |
1140 | 814k | MVMSpeshIns *p = y->first_ins; |
1141 | 18.4M | while (p && p->info->opcode == MVM_SSA_PHI) { |
1142 | 17.6M | MVMuint16 orig = p->operands[j + 1].reg.orig; |
1143 | 17.6M | MVMint32 st = var_info[orig].stack_top; |
1144 | 17.6M | if (st >= 0) |
1145 | 17.0M | p->operands[j + 1].reg.i = var_info[orig].stack[st]; |
1146 | 17.6M | else |
1147 | 526k | p->operands[j + 1].reg.i = 0; |
1148 | 17.6M | p = p->next; |
1149 | 17.6M | } |
1150 | 814k | } |
1151 | 492k | |
1152 | 492k | /* Rename for all the children in the dominator tree. */ |
1153 | 964k | for (i = 0; i < x->num_children; i++) |
1154 | 471k | rename_locals(tc, g, var_info, x->children[i]); |
1155 | 492k | |
1156 | 492k | /* Go over assignments and pop new variable names. */ |
1157 | 492k | a = x->first_ins; |
1158 | 2.87M | while (a) { |
1159 | 2.38M | MVMint32 is_phi = a->info->opcode == MVM_SSA_PHI; |
1160 | 5.27M | for (i = 0; i < a->info->num_operands; i++) { |
1161 | 3.85M | if (is_phi || (a->info->operands[i] & MVM_operand_rw_mask) == MVM_operand_write_reg) { |
1162 | 1.97M | MVMuint16 orig = a->operands[i].reg.orig; |
1163 | 1.97M | var_info[orig].stack_top--; |
1164 | 1.97M | } |
1165 | 3.85M | if (is_phi) |
1166 | 964k | break; |
1167 | 3.85M | } |
1168 | 2.38M | a = a->next; |
1169 | 2.38M | } |
1170 | 492k | } |
1171 | | |
1172 | | /* Transforms a spesh graph into SSA form. After this, the graph will have all |
1173 | | * register accesses given an SSA "version", and phi instructions inserted as |
1174 | | * needed. */ |
1175 | 21.1k | static void ssa(MVMThreadContext *tc, MVMSpeshGraph *g) { |
1176 | 21.1k | SSAVarInfo *var_info; |
1177 | 21.1k | MVMint32 i, num_locals; |
1178 | 21.1k | |
1179 | 21.1k | /* Compute dominance frontiers. */ |
1180 | 21.1k | MVMSpeshBB **rpo = reverse_postorder(tc, g); |
1181 | 21.1k | MVMint32 *doms = compute_dominators(tc, g, rpo); |
1182 | 21.1k | add_children(tc, g, rpo, doms); |
1183 | 21.1k | add_dominance_frontiers(tc, g, rpo, doms); |
1184 | 21.1k | MVM_free(rpo); |
1185 | 21.1k | MVM_free(doms); |
1186 | 21.1k | |
1187 | 21.1k | /* Initialize per-local data for SSA analysis. */ |
1188 | 21.1k | var_info = initialize_ssa_var_info(tc, g); |
1189 | 21.1k | |
1190 | 21.1k | /* Compute SSA itself. */ |
1191 | 21.1k | insert_phi_functions(tc, g, var_info); |
1192 | 21.1k | rename_locals(tc, g, var_info, g->entry); |
1193 | 21.1k | |
1194 | 21.1k | /* Allocate space for spesh facts for each local; clean up stacks while |
1195 | 21.1k | * we're at it. */ |
1196 | 21.1k | num_locals = g->num_locals; |
1197 | 21.1k | g->facts = MVM_spesh_alloc(tc, g, num_locals * sizeof(MVMSpeshFacts *)); |
1198 | 21.1k | g->fact_counts = MVM_spesh_alloc(tc, g, num_locals * sizeof(MVMuint16)); |
1199 | 282k | for (i = 0; i < num_locals; i++) { |
1200 | 261k | g->fact_counts[i] = var_info[i].count; |
1201 | 261k | g->facts[i] = MVM_spesh_alloc(tc, g, var_info[i].count * sizeof(MVMSpeshFacts)); |
1202 | 261k | if (var_info[i].stack_alloc) { |
1203 | 251k | MVM_free(var_info[i].stack); |
1204 | 251k | MVM_free(var_info[i].ass_nodes); |
1205 | 251k | } |
1206 | 261k | } |
1207 | 21.1k | MVM_free(var_info); |
1208 | 21.1k | } |
1209 | | |
1210 | | /* Takes a static frame and creates a spesh graph for it. */ |
1211 | | MVMSpeshGraph * MVM_spesh_graph_create(MVMThreadContext *tc, MVMStaticFrame *sf, |
1212 | 11.1k | MVMuint32 cfg_only, MVMuint32 insert_object_nulls) { |
1213 | 11.1k | /* Create top-level graph object. */ |
1214 | 11.1k | MVMSpeshGraph *g = MVM_calloc(1, sizeof(MVMSpeshGraph)); |
1215 | 11.1k | g->sf = sf; |
1216 | 11.1k | g->bytecode = sf->body.bytecode; |
1217 | 11.1k | g->bytecode_size = sf->body.bytecode_size; |
1218 | 11.1k | g->handlers = sf->body.handlers; |
1219 | 11.1k | g->num_handlers = sf->body.num_handlers; |
1220 | 11.1k | g->num_locals = sf->body.num_locals; |
1221 | 11.1k | g->num_lexicals = sf->body.num_lexicals; |
1222 | 11.1k | g->phi_infos = MVM_spesh_alloc(tc, g, MVMPhiNodeCacheSize * sizeof(MVMOpInfo)); |
1223 | 11.1k | |
1224 | 11.1k | /* Ensure the frame is validated, since we'll rely on this. */ |
1225 | 11.1k | if (sf->body.instrumentation_level == 0) { |
1226 | 0 | MVM_spesh_graph_destroy(tc, g); |
1227 | 0 | MVM_oops(tc, "Spesh: cannot build CFG from unvalidated frame"); |
1228 | 0 | } |
1229 | 11.1k | |
1230 | 11.1k | /* Build the CFG out of the static frame, and transform it to SSA. */ |
1231 | 11.1k | build_cfg(tc, g, sf, NULL, 0); |
1232 | 11.1k | if (insert_object_nulls) |
1233 | 11.1k | insert_object_null_instructions(tc, g); |
1234 | 11.1k | if (!cfg_only) { |
1235 | 11.1k | MVM_spesh_eliminate_dead_bbs(tc, g, 0); |
1236 | 11.1k | add_predecessors(tc, g); |
1237 | 11.1k | ssa(tc, g); |
1238 | 11.1k | } |
1239 | 11.1k | |
1240 | 11.1k | /* Hand back the completed graph. */ |
1241 | 11.1k | return g; |
1242 | 11.1k | } |
1243 | | |
1244 | | /* Takes a static frame and creates a spesh graph for it. */ |
1245 | | MVMSpeshGraph * MVM_spesh_graph_create_from_cand(MVMThreadContext *tc, MVMStaticFrame *sf, |
1246 | 10.0k | MVMSpeshCandidate *cand, MVMuint32 cfg_only) { |
1247 | 10.0k | /* Create top-level graph object. */ |
1248 | 10.0k | MVMSpeshGraph *g = MVM_calloc(1, sizeof(MVMSpeshGraph)); |
1249 | 10.0k | g->sf = sf; |
1250 | 10.0k | g->bytecode = cand->bytecode; |
1251 | 10.0k | g->bytecode_size = cand->bytecode_size; |
1252 | 10.0k | g->handlers = cand->handlers; |
1253 | 10.0k | g->num_handlers = cand->num_handlers; |
1254 | 10.0k | g->num_locals = cand->num_locals; |
1255 | 10.0k | g->num_lexicals = cand->num_lexicals; |
1256 | 10.0k | g->inlines = cand->inlines; |
1257 | 10.0k | g->num_inlines = cand->num_inlines; |
1258 | 10.0k | g->deopt_addrs = cand->deopts; |
1259 | 10.0k | g->num_deopt_addrs = cand->num_deopts; |
1260 | 10.0k | g->alloc_deopt_addrs = cand->num_deopts; |
1261 | 10.0k | g->deopt_named_used_bit_field = cand->deopt_named_used_bit_field; |
1262 | 10.0k | g->local_types = cand->local_types; |
1263 | 10.0k | g->lexical_types = cand->lexical_types; |
1264 | 10.0k | g->num_spesh_slots = cand->num_spesh_slots; |
1265 | 10.0k | g->alloc_spesh_slots = cand->num_spesh_slots; |
1266 | 10.0k | g->phi_infos = MVM_spesh_alloc(tc, g, MVMPhiNodeCacheSize * sizeof(MVMOpInfo)); |
1267 | 10.0k | g->cand = cand; |
1268 | 10.0k | |
1269 | 10.0k | g->spesh_slots = MVM_malloc(g->alloc_spesh_slots * sizeof(MVMCollectable *)); |
1270 | 10.0k | |
1271 | 10.0k | memcpy(g->spesh_slots, cand->spesh_slots, sizeof(MVMCollectable *) * g->num_spesh_slots); |
1272 | 10.0k | |
1273 | 10.0k | /* Ensure the frame is validated, since we'll rely on this. */ |
1274 | 10.0k | if (sf->body.instrumentation_level == 0) { |
1275 | 0 | MVM_spesh_graph_destroy(tc, g); |
1276 | 0 | MVM_oops(tc, "Spesh: cannot build CFG from unvalidated frame"); |
1277 | 0 | } |
1278 | 10.0k | |
1279 | 10.0k | /* Build the CFG out of the static frame, and transform it to SSA. */ |
1280 | 10.0k | build_cfg(tc, g, sf, cand->deopts, cand->num_deopts); |
1281 | 10.0k | if (!cfg_only) { |
1282 | 10.0k | MVM_spesh_eliminate_dead_bbs(tc, g, 0); |
1283 | 10.0k | add_predecessors(tc, g); |
1284 | 10.0k | ssa(tc, g); |
1285 | 10.0k | } |
1286 | 10.0k | |
1287 | 10.0k | /* Hand back the completed graph. */ |
1288 | 10.0k | return g; |
1289 | 10.0k | } |
1290 | | |
1291 | | /* Recomputes the dominance tree, after modifications to the CFG. */ |
1292 | 11.1k | void MVM_spesh_graph_recompute_dominance(MVMThreadContext *tc, MVMSpeshGraph *g) { |
1293 | 11.1k | MVMSpeshBB **rpo; |
1294 | 11.1k | MVMint32 *doms; |
1295 | 11.1k | |
1296 | 11.1k | /* First, clear away all existing dominance tree information; we also toss |
1297 | 11.1k | * out all of the predecessors, in case they got out of sync (should try |
1298 | 11.1k | * and fix things up to not need this in the future). */ |
1299 | 11.1k | MVMSpeshBB *cur_bb = g->entry; |
1300 | 333k | while (cur_bb) { |
1301 | 322k | cur_bb->children = NULL; |
1302 | 322k | cur_bb->num_children = 0; |
1303 | 322k | cur_bb->pred = NULL; |
1304 | 322k | cur_bb->num_pred = 0; |
1305 | 322k | cur_bb = cur_bb->linear_next; |
1306 | 322k | } |
1307 | 11.1k | |
1308 | 11.1k | /* Now form the new dominance tree. */ |
1309 | 11.1k | add_predecessors(tc, g); |
1310 | 11.1k | rpo = reverse_postorder(tc, g); |
1311 | 11.1k | doms = compute_dominators(tc, g, rpo); |
1312 | 11.1k | add_children(tc, g, rpo, doms); |
1313 | 11.1k | MVM_free(rpo); |
1314 | 11.1k | MVM_free(doms); |
1315 | 11.1k | } |
1316 | | |
1317 | | /* Marks GCables held in a spesh graph. */ |
1318 | 0 | void MVM_spesh_graph_mark(MVMThreadContext *tc, MVMSpeshGraph *g, MVMGCWorklist *worklist) { |
1319 | 0 | MVMuint16 i, j, num_locals, num_facts, *local_types; |
1320 | 0 |
|
1321 | 0 | /* Mark static frame. */ |
1322 | 0 | MVM_gc_worklist_add(tc, worklist, &g->sf); |
1323 | 0 |
|
1324 | 0 | /* Mark facts. */ |
1325 | 0 | num_locals = g->num_locals; |
1326 | 0 | local_types = g->local_types ? g->local_types : g->sf->body.local_types; |
1327 | 0 | for (i = 0; i < num_locals; i++) { |
1328 | 0 | num_facts = g->fact_counts[i]; |
1329 | 0 | for (j = 0; j < num_facts; j++) { |
1330 | 0 | MVMint32 flags = g->facts[i][j].flags; |
1331 | 0 | if (flags & MVM_SPESH_FACT_KNOWN_TYPE) |
1332 | 0 | MVM_gc_worklist_add(tc, worklist, &(g->facts[i][j].type)); |
1333 | 0 | if (flags & MVM_SPESH_FACT_KNOWN_DECONT_TYPE) |
1334 | 0 | MVM_gc_worklist_add(tc, worklist, &(g->facts[i][j].decont_type)); |
1335 | 0 | if (flags & MVM_SPESH_FACT_KNOWN_VALUE) { |
1336 | 0 | if (local_types[i] == MVM_reg_obj) |
1337 | 0 | MVM_gc_worklist_add(tc, worklist, &(g->facts[i][j].value.o)); |
1338 | 0 | else if (local_types[i] == MVM_reg_str) |
1339 | 0 | MVM_gc_worklist_add(tc, worklist, &(g->facts[i][j].value.s)); |
1340 | 0 | } |
1341 | 0 | } |
1342 | 0 | } |
1343 | 0 | } |
1344 | | |
1345 | | /* Destroys a spesh graph, deallocating all its associated memory. */ |
1346 | 20.8k | void MVM_spesh_graph_destroy(MVMThreadContext *tc, MVMSpeshGraph *g) { |
1347 | 20.8k | /* Free all of the allocated node memory. */ |
1348 | 20.8k | MVM_region_destroy(tc, &g->region_alloc); |
1349 | 20.8k | /* Free handlers array, if different from the static frame. */ |
1350 | 20.8k | if (!g->cand && g->handlers && g->handlers != g->sf->body.handlers) |
1351 | 3.22k | MVM_free(g->handlers); |
1352 | 20.8k | |
1353 | 20.8k | /* Free the graph itself. */ |
1354 | 20.8k | MVM_free(g); |
1355 | 20.8k | } |