/home/travis/build/MoarVM/MoarVM/src/spesh/deopt.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "moar.h" |
2 | | |
3 | | /* In some cases, we may have specialized bytecode "on the stack" and need to |
4 | | * back out of it, because some assumption it made has been invalidated. This |
5 | | * file contains implementations of those various forms of de-opt. */ |
6 | | |
7 | | #define MVM_LOG_DEOPTS 0 |
8 | | |
9 | | /* Uninlining can invalidate what the dynlex cache points to, so we'll |
10 | | * clear it in various caches. */ |
11 | 190k | MVM_STATIC_INLINE void clear_dynlex_cache(MVMThreadContext *tc, MVMFrame *f) { |
12 | 190k | f->dynlex_cache_name = NULL; |
13 | 190k | f->dynlex_cache_reg = NULL; |
14 | 190k | } |
15 | | |
16 | | /* If we have to deopt inside of a frame containing inlines, and we're in |
17 | | * an inlined frame at the point we hit deopt, we need to undo the inlining |
18 | | * by switching all levels of inlined frame out for a bunch of frames that |
19 | | * are running the de-optimized code. We may, of course, be in the original, |
20 | | * non-inline, bit of the code - in which case we've nothing to do. */ |
21 | | static void uninline(MVMThreadContext *tc, MVMFrame *f, MVMSpeshCandidate *cand, |
22 | 67.9k | MVMint32 offset, MVMint32 deopt_offset, MVMFrame *callee) { |
23 | 67.9k | MVMFrame *last_uninlined = NULL; |
24 | 67.9k | MVMuint16 last_res_reg; |
25 | 67.9k | MVMReturnType last_res_type; |
26 | 67.9k | MVMuint32 last_return_deopt_idx; |
27 | 67.9k | MVMint32 i; |
28 | 166k | for (i = 0; i < cand->num_inlines; i++) { |
29 | 98.8k | if (offset >= cand->inlines[i].start && offset < cand->inlines[i].end) { |
30 | 1.66k | /* Create the frame. */ |
31 | 1.66k | MVMCode *ucode = cand->inlines[i].code; |
32 | 1.66k | MVMStaticFrame *usf = ucode->body.sf; |
33 | 1.66k | MVMFrame *uf; |
34 | 1.66k | MVMROOT(tc, f, { |
35 | 1.66k | MVMROOT(tc, callee, { |
36 | 1.66k | MVMROOT(tc, last_uninlined, { |
37 | 1.66k | MVMROOT(tc, usf, { |
38 | 1.66k | uf = MVM_frame_create_for_deopt(tc, usf, ucode); |
39 | 1.66k | }); |
40 | 1.66k | }); |
41 | 1.66k | }); |
42 | 1.66k | }); |
43 | 1.66k | #if MVM_LOG_DEOPTS |
44 | | fprintf(stderr, "Recreated frame '%s' (cuid '%s')\n", |
45 | | MVM_string_utf8_encode_C_string(tc, usf->body.name), |
46 | | MVM_string_utf8_encode_C_string(tc, usf->body.cuuid)); |
47 | | #endif |
48 | 1.66k | |
49 | 1.66k | /* Copy the locals and lexicals into place. */ |
50 | 1.66k | if (usf->body.num_locals) |
51 | 1.66k | memcpy(uf->work, f->work + cand->inlines[i].locals_start, |
52 | 1.66k | usf->body.num_locals * sizeof(MVMRegister)); |
53 | 1.66k | if (usf->body.num_lexicals) |
54 | 0 | memcpy(uf->env, f->env + cand->inlines[i].lexicals_start, |
55 | 0 | usf->body.num_lexicals * sizeof(MVMRegister)); |
56 | 1.66k | |
57 | 1.66k | /* Did we already uninline a frame? */ |
58 | 1.66k | if (last_uninlined) { |
59 | 0 | /* Yes; multi-level un-inline. Switch it back to deopt'd |
60 | 0 | * code. */ |
61 | 0 | uf->effective_bytecode = uf->static_info->body.bytecode; |
62 | 0 | uf->effective_handlers = uf->static_info->body.handlers; |
63 | 0 | uf->effective_spesh_slots = NULL; |
64 | 0 | uf->spesh_cand = NULL; |
65 | 0 |
|
66 | 0 | /* Set up the return location. */ |
67 | 0 | uf->return_address = uf->static_info->body.bytecode + |
68 | 0 | cand->deopts[2 * last_return_deopt_idx]; |
69 | 0 |
|
70 | 0 | /* Set result type and register. */ |
71 | 0 | uf->return_type = last_res_type; |
72 | 0 | if (last_res_type == MVM_RETURN_VOID) |
73 | 0 | uf->return_value = NULL; |
74 | 0 | else |
75 | 0 | uf->return_value = uf->work + last_res_reg; |
76 | 0 |
|
77 | 0 | /* Set up last uninlined's caller to us. */ |
78 | 0 | MVM_ASSIGN_REF(tc, &(last_uninlined->header), last_uninlined->caller, uf); |
79 | 0 | } |
80 | 1.66k | else { |
81 | 1.66k | /* First uninlined frame. Are we in the middle of the call |
82 | 1.66k | * stack (and thus in deopt_all)? */ |
83 | 1.66k | if (callee) { |
84 | 1.66k | /* Tweak the callee's caller to the uninlined frame, not |
85 | 1.66k | * the frame holding the inlinings. */ |
86 | 1.66k | MVM_ASSIGN_REF(tc, &(callee->header), callee->caller, uf); |
87 | 1.66k | |
88 | 1.66k | /* Copy over the return location. */ |
89 | 1.66k | uf->return_address = uf->effective_bytecode + deopt_offset; |
90 | 1.66k | |
91 | 1.66k | /* Set result type and register. */ |
92 | 1.66k | uf->return_type = f->return_type; |
93 | 1.66k | if (uf->return_type == MVM_RETURN_VOID) { |
94 | 0 | uf->return_value = NULL; |
95 | 0 | } |
96 | 1.66k | else { |
97 | 1.66k | MVMuint16 orig_reg = (MVMuint16)(f->return_value - f->work); |
98 | 1.66k | MVMuint16 ret_reg = orig_reg - cand->inlines[i].locals_start; |
99 | 1.66k | uf->return_value = uf->work + ret_reg; |
100 | 1.66k | } |
101 | 1.66k | } |
102 | 2 | else { |
103 | 2 | /* No, it's the deopt_one case, so this is where we'll point |
104 | 2 | * the interpreter. */ |
105 | 2 | tc->cur_frame = uf; |
106 | 2 | tc->current_frame_nr = uf->sequence_nr; |
107 | 2 | *(tc->interp_cur_op) = uf->effective_bytecode + deopt_offset; |
108 | 2 | *(tc->interp_bytecode_start) = uf->effective_bytecode; |
109 | 2 | *(tc->interp_reg_base) = uf->work; |
110 | 2 | *(tc->interp_cu) = usf->body.cu; |
111 | 2 | } |
112 | 1.66k | } |
113 | 1.66k | |
114 | 1.66k | /* Update tracking variables for last uninline. Note that we know |
115 | 1.66k | * an inline ends with a goto, which is how we're able to find a |
116 | 1.66k | * return address offset. */ |
117 | 1.66k | last_uninlined = uf; |
118 | 1.66k | last_res_reg = cand->inlines[i].res_reg; |
119 | 1.66k | last_res_type = cand->inlines[i].res_type; |
120 | 1.66k | last_return_deopt_idx = cand->inlines[i].return_deopt_idx; |
121 | 1.66k | } |
122 | 98.8k | } |
123 | 67.9k | if (last_uninlined) { |
124 | 1.66k | /* Set return address, which we need to resolve to the deopt'd one. */ |
125 | 1.66k | f->return_address = f->static_info->body.bytecode + |
126 | 1.66k | cand->deopts[2 * last_return_deopt_idx]; |
127 | 1.66k | |
128 | 1.66k | /* Set result type and register. */ |
129 | 1.66k | f->return_type = last_res_type; |
130 | 1.66k | if (last_res_type == MVM_RETURN_VOID) |
131 | 0 | f->return_value = NULL; |
132 | 1.66k | else |
133 | 1.66k | f->return_value = f->work + last_res_reg; |
134 | 1.66k | |
135 | 1.66k | /* Set up inliner as the caller, given we now have a direct inline. */ |
136 | 1.66k | MVM_ASSIGN_REF(tc, &(last_uninlined->header), last_uninlined->caller, f); |
137 | 1.66k | } |
138 | 66.3k | else { |
139 | 66.3k | /* Weren't in an inline after all. What kind of deopt? */ |
140 | 66.3k | if (callee) { |
141 | 9.49k | /* Deopt all. Move return address. */ |
142 | 9.49k | f->return_address = f->effective_bytecode + deopt_offset; |
143 | 9.49k | } |
144 | 56.8k | else { |
145 | 56.8k | /* Deopt one. Move interpreter. */ |
146 | 56.8k | *(tc->interp_cur_op) = f->static_info->body.bytecode + deopt_offset; |
147 | 56.8k | *(tc->interp_bytecode_start) = f->static_info->body.bytecode; |
148 | 56.8k | } |
149 | 66.3k | } |
150 | 67.9k | } |
151 | | |
152 | 7.27k | static MVMint32 find_deopt_target(MVMThreadContext *tc, MVMFrame *f, MVMint32 deopt_offset) { |
153 | 7.27k | MVMint32 i; |
154 | 57.3k | for (i = 0; i < f->spesh_cand->num_deopts * 2; i += 2) { |
155 | 57.3k | if (f->spesh_cand->deopts[i + 1] == deopt_offset) { |
156 | 7.27k | return f->spesh_cand->deopts[i]; |
157 | 7.27k | } |
158 | 57.3k | } |
159 | 0 | MVM_oops(tc, "find_deopt_target failed for %s (%s)", |
160 | 0 | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.name), |
161 | 0 | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.cuuid)); |
162 | 0 | } |
163 | | |
164 | 70.3k | static void deopt_frame(MVMThreadContext *tc, MVMFrame *f, MVMint32 deopt_offset, MVMint32 deopt_target) { |
165 | 70.3k | /* Found it; are we in an inline? */ |
166 | 70.3k | MVMSpeshInline *inlines = f->spesh_cand->inlines; |
167 | 70.3k | if (inlines) { |
168 | 56.8k | /* Yes, going to have to re-create the frames; uninline |
169 | 56.8k | * moves the interpreter, so we can just tweak the last |
170 | 56.8k | * frame. For the moment, uninlining creates its frames |
171 | 56.8k | * on the heap, so we'll force the current call stack to |
172 | 56.8k | * the heap to preserve the "no heap -> stack pointers" |
173 | 56.8k | * invariant. */ |
174 | 56.8k | f = MVM_frame_force_to_heap(tc, f); |
175 | 56.8k | MVMROOT(tc, f, { |
176 | 56.8k | uninline(tc, f, f->spesh_cand, deopt_offset, deopt_target, NULL); |
177 | 56.8k | }); |
178 | 56.8k | f->effective_bytecode = f->static_info->body.bytecode; |
179 | 56.8k | f->effective_handlers = f->static_info->body.handlers; |
180 | 56.8k | f->effective_spesh_slots = NULL; |
181 | 56.8k | f->spesh_cand = NULL; |
182 | 56.8k | #if MVM_LOG_DEOPTS |
183 | | fprintf(stderr, "Completed deopt_one in '%s' (cuid '%s') with uninlining\n", |
184 | | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.name), |
185 | | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.cuuid)); |
186 | | #endif |
187 | 56.8k | } |
188 | 13.5k | else { |
189 | 13.5k | /* No inlining; simple case. Switch back to the original code. */ |
190 | 13.5k | f->effective_bytecode = f->static_info->body.bytecode; |
191 | 13.5k | f->effective_handlers = f->static_info->body.handlers; |
192 | 13.5k | *(tc->interp_cur_op) = f->effective_bytecode + deopt_target; |
193 | 13.5k | *(tc->interp_bytecode_start) = f->effective_bytecode; |
194 | 13.5k | f->effective_spesh_slots = NULL; |
195 | 13.5k | f->spesh_cand = NULL; |
196 | 13.5k | #if MVM_LOG_DEOPTS |
197 | | fprintf(stderr, "Completed deopt_one in '%s' (cuid '%s')\n", |
198 | | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.name), |
199 | | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.cuuid)); |
200 | | #endif |
201 | 13.5k | } |
202 | 70.3k | |
203 | 70.3k | } |
204 | | |
205 | | /* De-optimizes the currently executing frame, provided it is specialized and |
206 | | * at a valid de-optimization point. Typically used when a guard fails. */ |
207 | 7.27k | void MVM_spesh_deopt_one(MVMThreadContext *tc) { |
208 | 7.27k | MVMFrame *f = tc->cur_frame; |
209 | 7.27k | if (tc->instance->profiling) |
210 | 0 | MVM_profiler_log_deopt_one(tc); |
211 | 7.27k | #if MVM_LOG_DEOPTS |
212 | | fprintf(stderr, "deopt_one requested in frame '%s' (cuid '%s')\n", |
213 | | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.name), |
214 | | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.cuuid)); |
215 | | #endif |
216 | 7.27k | clear_dynlex_cache(tc, f); |
217 | 7.27k | if (f->effective_bytecode != f->static_info->body.bytecode) { |
218 | 7.27k | MVMint32 deopt_offset = *(tc->interp_cur_op) - f->effective_bytecode; |
219 | 7.27k | MVMint32 deopt_target = find_deopt_target(tc, f, deopt_offset); |
220 | 7.27k | deopt_frame(tc, tc->cur_frame, deopt_offset, deopt_target); |
221 | 7.27k | } |
222 | 0 | else { |
223 | 0 | MVM_oops(tc, "deopt_one failed for %s (%s)", |
224 | 0 | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.name), |
225 | 0 | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.cuuid)); |
226 | 0 | } |
227 | 7.27k | } |
228 | | |
229 | | /* De-optimizes the current frame by directly specifying the addresses */ |
230 | | void MVM_spesh_deopt_one_direct(MVMThreadContext *tc, MVMint32 deopt_offset, |
231 | 63.1k | MVMint32 deopt_target) { |
232 | 63.1k | MVMFrame *f = tc->cur_frame; |
233 | 63.1k | if (tc->instance->profiling) |
234 | 0 | MVM_profiler_log_deopt_one(tc); |
235 | 63.1k | clear_dynlex_cache(tc, f); |
236 | 63.1k | if (f->effective_bytecode != f->static_info->body.bytecode) { |
237 | 63.1k | deopt_frame(tc, tc->cur_frame, deopt_offset, deopt_target); |
238 | 0 | } else { |
239 | 0 | MVM_oops(tc, "deopt_one_direct failed for %s (%s)", |
240 | 0 | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.name), |
241 | 0 | MVM_string_utf8_encode_C_string(tc, tc->cur_frame->static_info->body.cuuid)); |
242 | 0 | } |
243 | 63.1k | |
244 | 63.1k | } |
245 | | |
246 | | /* De-optimizes all specialized frames on the call stack. Used when a change |
247 | | * is made the could invalidate all kinds of assumptions all over the place |
248 | | * (such as a mix-in). */ |
249 | 2.11k | void MVM_spesh_deopt_all(MVMThreadContext *tc) { |
250 | 2.11k | /* Walk frames looking for any callers in specialized bytecode. */ |
251 | 2.11k | MVMFrame *l = MVM_frame_force_to_heap(tc, tc->cur_frame); |
252 | 2.11k | MVMFrame *f = tc->cur_frame->caller; |
253 | 2.11k | if (tc->instance->profiling) |
254 | 0 | MVM_profiler_log_deopt_all(tc); |
255 | 122k | while (f) { |
256 | 120k | clear_dynlex_cache(tc, f); |
257 | 120k | if (f->effective_bytecode != f->static_info->body.bytecode && f->spesh_log_idx < 0) { |
258 | 20.2k | /* Found one. Is it JITted code? */ |
259 | 20.2k | if (f->spesh_cand->jitcode && f->jit_entry_label) { |
260 | 12.2k | MVMint32 num_deopts = f->spesh_cand->jitcode->num_deopts; |
261 | 12.2k | MVMJitDeopt *deopts = f->spesh_cand->jitcode->deopts; |
262 | 12.2k | void **labels = f->spesh_cand->jitcode->labels; |
263 | 12.2k | MVMint32 i; |
264 | 67.9k | for (i = 0; i < num_deopts; i++) { |
265 | 64.5k | if (labels[deopts[i].label] == f->jit_entry_label) { |
266 | 8.82k | /* Resolve offset and target. */ |
267 | 8.82k | MVMint32 deopt_idx = deopts[i].idx; |
268 | 8.82k | MVMint32 deopt_offset = f->spesh_cand->deopts[2 * deopt_idx + 1]; |
269 | 8.82k | MVMint32 deopt_target = f->spesh_cand->deopts[2 * deopt_idx]; |
270 | 8.82k | |
271 | 8.82k | #if MVM_LOG_DEOPTS |
272 | | fprintf(stderr, "Found deopt label for JIT (%d) (label %d idx %d)\n", i, |
273 | | deopts[i].label, deopts[i].idx); |
274 | | #endif |
275 | 8.82k | |
276 | 8.82k | /* Switch frame itself back to the original code. */ |
277 | 8.82k | f->effective_bytecode = f->static_info->body.bytecode; |
278 | 8.82k | f->effective_handlers = f->static_info->body.handlers; |
279 | 8.82k | |
280 | 8.82k | /* Re-create any frames needed if we're in an inline; if not, |
281 | 8.82k | * just update return address. */ |
282 | 8.82k | if (f->spesh_cand->inlines) { |
283 | 5.60k | MVMROOT(tc, f, { |
284 | 5.60k | MVMROOT(tc, l, { |
285 | 5.60k | uninline(tc, f, f->spesh_cand, deopt_offset, deopt_target, l); |
286 | 5.60k | }); |
287 | 5.60k | }); |
288 | 5.60k | } |
289 | 3.21k | else { |
290 | 3.21k | f->return_address = f->effective_bytecode + deopt_target; |
291 | 3.21k | } |
292 | 8.82k | |
293 | 8.82k | /* No spesh cand/slots needed now. */ |
294 | 8.82k | f->effective_spesh_slots = NULL; |
295 | 8.82k | f->spesh_cand = NULL; |
296 | 8.82k | f->jit_entry_label = NULL; |
297 | 8.82k | |
298 | 8.82k | break; |
299 | 8.82k | } |
300 | 64.5k | } |
301 | 12.2k | #if MVM_LOG_DEOPTS |
302 | | if (i == num_deopts) |
303 | | fprintf(stderr, "JIT: can't find deopt all idx"); |
304 | | #endif |
305 | 12.2k | } |
306 | 20.2k | |
307 | 7.93k | else { |
308 | 7.93k | /* Not JITted; see if we can find the return address in the deopt table. */ |
309 | 7.93k | MVMint32 ret_offset = f->return_address - f->effective_bytecode; |
310 | 7.93k | MVMint32 i; |
311 | 113k | for (i = 0; i < f->spesh_cand->num_deopts * 2; i += 2) { |
312 | 113k | if (f->spesh_cand->deopts[i + 1] == ret_offset) { |
313 | 7.93k | /* Switch frame itself back to the original code. */ |
314 | 7.93k | f->effective_bytecode = f->static_info->body.bytecode; |
315 | 7.93k | f->effective_handlers = f->static_info->body.handlers; |
316 | 7.93k | |
317 | 7.93k | /* Re-create any frames needed if we're in an inline; if not, |
318 | 7.93k | * just update return address. */ |
319 | 7.93k | if (f->spesh_cand->inlines) { |
320 | 5.54k | MVMROOT(tc, f, { |
321 | 5.54k | MVMROOT(tc, l, { |
322 | 5.54k | uninline(tc, f, f->spesh_cand, ret_offset, f->spesh_cand->deopts[i], l); |
323 | 5.54k | }); |
324 | 5.54k | }); |
325 | 5.54k | } |
326 | 2.38k | else { |
327 | 2.38k | f->return_address = f->effective_bytecode + f->spesh_cand->deopts[i]; |
328 | 2.38k | } |
329 | 7.93k | |
330 | 7.93k | /* No spesh cand/slots needed now. */ |
331 | 7.93k | f->effective_spesh_slots = NULL; |
332 | 7.93k | f->spesh_cand = NULL; |
333 | 7.93k | |
334 | 7.93k | break; |
335 | 7.93k | } |
336 | 113k | } |
337 | 7.93k | } |
338 | 20.2k | } |
339 | 120k | l = f; |
340 | 120k | f = f->caller; |
341 | 120k | } |
342 | 2.11k | } |