/home/travis/build/MoarVM/MoarVM/src/jit/linear_scan.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "moar.h" |
2 | | #include "internal.h" |
3 | | |
4 | | #define __COMMA__ , |
5 | | static MVMint8 available_gpr[] = { |
6 | | MVM_JIT_ARCH_AVAILABLE_GPR(MVM_JIT_REG) |
7 | | }; |
8 | | static MVMint8 available_num[] = { |
9 | | MVM_JIT_ARCH_NUM(MVM_JIT_REG) |
10 | | }; |
11 | | /* bitmap, so make it '|' to combine the shifted register numbers */ |
12 | | #undef __COMMA__ |
13 | | #define __COMMA__ | |
14 | | #define SHIFT(x) (1 << (MVM_JIT_REG(x))) |
15 | | static const MVMBitmap NVR_GPR_BITMAP = MVM_JIT_ARCH_NONVOLATILE_GPR(SHIFT); |
16 | | static const MVMBitmap AVAILABLE_GPR_BITMAP = MVM_JIT_ARCH_AVAILABLE_GPR(SHIFT); |
17 | | #undef SHIFT |
18 | | #undef __COMMA__ |
19 | | |
20 | | |
21 | | #define MAX_ACTIVE sizeof(available_gpr) |
22 | 0 | #define NYI(x) MVM_oops(tc, #x "not yet implemented") |
23 | 2.87M | #define _ASSERT(b, msg) if (!(b)) do { MVM_panic(1, msg); } while (0) |
24 | | |
25 | | #if MVM_JIT_DEBUG |
26 | | #define _DEBUG(fmt, ...) do { fprintf(stderr, fmt "%s", __VA_ARGS__, "\n"); } while(0) |
27 | | #else |
28 | 19.5M | #define _DEBUG(fmt, ...) do {} while(0) |
29 | | #endif |
30 | | |
31 | | |
32 | | typedef struct { |
33 | | MVMint32 key; |
34 | | MVMint32 idx; |
35 | | } UnionFind; |
36 | | |
37 | | typedef struct ValueRef ValueRef; |
38 | | struct ValueRef { |
39 | | MVMint32 tile_idx; |
40 | | MVMint32 value_idx; |
41 | | ValueRef *next; |
42 | | }; |
43 | | |
44 | | struct Hole { |
45 | | MVMint32 start, end; |
46 | | struct Hole *next; |
47 | | }; |
48 | | |
49 | | typedef struct { |
50 | | /* double-ended queue of value refs */ |
51 | | ValueRef *first, *last; |
52 | | /* order number of first and last refs */ |
53 | | MVMint32 start, end; |
54 | | |
55 | | /* list of holes in ascending order */ |
56 | | struct Hole *holes; |
57 | | |
58 | | /* We can have at most two synthetic tiles, one attached to the first |
59 | | * definition and one to the last use... we could also point directly into |
60 | | * the values array of the tile, but it is not directly necessary */ |
61 | | MVMJitTile *synthetic[2]; |
62 | | |
63 | | MVMint8 register_spec; |
64 | | MVMJitStorageClass reg_cls; |
65 | | MVMint32 reg_num; |
66 | | MVMint8 reg_type; |
67 | | |
68 | | |
69 | | MVMint32 spill_pos; |
70 | | MVMint32 spill_idx; |
71 | | } LiveRange; |
72 | | |
73 | | |
74 | | typedef struct { |
75 | | MVMJitCompiler *compiler; |
76 | | |
77 | | /* Sets of values */ |
78 | | UnionFind *sets; |
79 | | |
80 | | /* single buffer for uses, definitions */ |
81 | | ValueRef *refs; |
82 | | MVMint32 refs_num; |
83 | | |
84 | | /* single buffer for number of holes */ |
85 | | struct Hole *holes; |
86 | | MVMint32 holes_top; |
87 | | |
88 | | /* All values ever defined by the register allcoator */ |
89 | | MVM_VECTOR_DECL(LiveRange, values); |
90 | | |
91 | | /* 'Currently' active values */ |
92 | | MVMint32 active_top; |
93 | | MVMint32 active[MAX_ACTIVE]; |
94 | | |
95 | | /* Values still left to do (heap) */ |
96 | | MVM_VECTOR_DECL(MVMint32, worklist); |
97 | | /* Retired values (to be assigned registers) (heap) */ |
98 | | MVM_VECTOR_DECL(MVMint32, retired); |
99 | | /* Spilled values */ |
100 | | MVM_VECTOR_DECL(MVMint32, spilled); |
101 | | |
102 | | |
103 | | /* Register handout ring */ |
104 | | MVMint8 reg_ring[MAX_ACTIVE]; |
105 | | MVMint32 reg_give, reg_take; |
106 | | |
107 | | } RegisterAllocator; |
108 | | |
109 | | |
110 | | /* For first/last ref comparison, the tile indexes are doubled, and the indexes |
111 | | * of synthetics are biased with +1/-1. We use this extra space on the number |
112 | | * line to ensure consistent ordering and expiring behavior for 'synthetic' live |
113 | | * ranges that either start before an instruction (loading a required value) or |
114 | | * end just after one (storing the produced value). Without this, ordering |
115 | | * problems can cause two 'atomic' live ranges to be allocated and expired |
116 | | * before their actual last use */ |
117 | 45.1M | MVM_STATIC_INLINE MVMint32 order_nr(MVMint32 tile_idx) { |
118 | 45.1M | return tile_idx * 2; |
119 | 45.1M | } |
120 | | |
121 | | |
122 | 484k | MVM_STATIC_INLINE MVMint32 is_definition(ValueRef *v) { |
123 | 484k | return (v->value_idx == 0); |
124 | 484k | } |
125 | | |
126 | 9.12M | MVM_STATIC_INLINE MVMint32 is_arglist_ref(MVMJitTileList *list, ValueRef *v) { |
127 | 9.12M | return (list->items[v->tile_idx]->op == MVM_JIT_ARGLIST); |
128 | 9.12M | } |
129 | | |
130 | | |
131 | 1.55M | MVM_STATIC_INLINE MVMint32 live_range_is_empty(LiveRange *range) { |
132 | 1.55M | return (range->first == NULL && |
133 | 100k | range->synthetic[0] == NULL && |
134 | 100k | range->synthetic[1] == NULL); |
135 | 1.55M | } |
136 | | |
137 | | /* allocate a new live range value by pointer-bumping */ |
138 | 3.96M | MVMint32 live_range_init(RegisterAllocator *alc) { |
139 | 3.96M | LiveRange *range; |
140 | 3.96M | MVMint32 idx = alc->values_num++; |
141 | 3.96M | MVM_VECTOR_ENSURE_SIZE(alc->values, idx); |
142 | 3.96M | alc->values[idx].spill_idx = INT32_MAX; |
143 | 3.96M | alc->values[idx].start = INT32_MAX; |
144 | 3.96M | return idx; |
145 | 3.96M | } |
146 | | |
147 | | |
148 | | /* append ref to end of queue */ |
149 | 8.08M | static void live_range_add_ref(RegisterAllocator *alc, LiveRange *range, MVMint32 tile_idx, MVMint32 value_idx) { |
150 | 8.08M | ValueRef *ref = alc->refs + alc->refs_num++; |
151 | 8.08M | |
152 | 8.08M | ref->tile_idx = tile_idx; |
153 | 8.08M | ref->value_idx = value_idx; |
154 | 8.08M | |
155 | 8.08M | if (range->first == NULL) { |
156 | 3.47M | range->first = ref; |
157 | 3.47M | } |
158 | 8.08M | if (range->last != NULL) { |
159 | 4.61M | range->last->next = ref; |
160 | 4.61M | } |
161 | 8.08M | range->last = ref; |
162 | 8.08M | ref->next = NULL; |
163 | 8.08M | |
164 | 8.08M | range->start = MIN(order_nr(tile_idx), range->start); |
165 | 8.08M | range->end = MAX(order_nr(tile_idx), range->end); |
166 | 8.08M | } |
167 | | |
168 | | |
169 | | /* merge value ref sets */ |
170 | 100k | static void live_range_merge(LiveRange *a, LiveRange *b) { |
171 | 100k | ValueRef *head = NULL, *tail = NULL; |
172 | 100k | MVMint32 i; |
173 | 100k | _DEBUG("Merging live ranges (%d-%d) and (%d-%d)", |
174 | 100k | (a)->start, (a)->end, (b)->start, (b)->end); |
175 | 100k | if (a->start <= b->start) { |
176 | 100k | head = a->first; |
177 | 100k | a->first = a->first->next; |
178 | 0 | } else { |
179 | 0 | head = b->first; |
180 | 0 | b->first = b->first->next; |
181 | 0 | } |
182 | 100k | tail = head; |
183 | 210k | while (a->first != NULL && b->first != NULL) { |
184 | 110k | if (a->first->tile_idx <= b->first->tile_idx) { |
185 | 110k | tail->next = a->first; |
186 | 110k | a->first = a->first->next; |
187 | 0 | } else { |
188 | 0 | tail->next = b->first; |
189 | 0 | b->first = b->first->next; |
190 | 0 | } |
191 | 110k | tail = tail->next; |
192 | 110k | } |
193 | 100k | while (a->first != NULL) { |
194 | 0 | tail->next = a->first; |
195 | 0 | a->first = a->first->next; |
196 | 0 | tail = tail->next; |
197 | 0 | } |
198 | 246k | while (b->first != NULL) { |
199 | 145k | tail->next = b->first; |
200 | 145k | b->first = b->first->next; |
201 | 145k | tail = tail->next; |
202 | 145k | } |
203 | 100k | |
204 | 100k | a->first = head; |
205 | 100k | a->last = tail; |
206 | 100k | |
207 | 300k | for (i = 0; i < 2; i++) { |
208 | 200k | if (b->synthetic[i] == NULL) { |
209 | 200k | continue; |
210 | 200k | } |
211 | 0 | if (a->synthetic[i] != NULL) { |
212 | 0 | MVM_panic(1, "Can't merge the same synthetic!"); |
213 | 0 | } |
214 | 0 | } |
215 | 100k | a->start = MIN(a->start, b->start); |
216 | 100k | a->end = MAX(a->end, b->end); |
217 | 100k | /* deinitialize the live range */ |
218 | 100k | b->start = INT32_MAX; |
219 | 100k | b->end = 0; |
220 | 100k | } |
221 | | |
222 | 105k | static struct Hole * live_range_has_hole(LiveRange *value, MVMint32 order_nr) { |
223 | 105k | struct Hole *h; |
224 | 105k | /* By construction these are in linear ascending order, and never overlap */ |
225 | 124k | for (h = value->holes; h != NULL && h->start <= order_nr; h = h->next) { |
226 | 58.0k | if (h->end >= order_nr) |
227 | 38.5k | return h; |
228 | 58.0k | } |
229 | 66.5k | return NULL; |
230 | 105k | } |
231 | | |
232 | | |
233 | 10.5M | UnionFind * value_set_find(UnionFind *sets, MVMint32 key) { |
234 | 12.4M | while (sets[key].key != key) { |
235 | 1.90M | key = sets[key].key; |
236 | 1.90M | } |
237 | 10.5M | return sets + key; |
238 | 10.5M | } |
239 | | |
240 | | |
241 | 100k | MVMint32 value_set_union(UnionFind *sets, LiveRange *values, MVMint32 a, MVMint32 b) { |
242 | 100k | /* dereference the sets to their roots */ |
243 | 100k | a = value_set_find(sets, a)->key; |
244 | 100k | b = value_set_find(sets, b)->key; |
245 | 100k | if (a == b) { |
246 | 0 | /* secretly the same set anyway, could happen in some combinations of |
247 | 0 | * IF, COPY, and DO. */ |
248 | 0 | return a; |
249 | 0 | } |
250 | 100k | if (values[sets[b].idx].start < values[sets[a].idx].start) { |
251 | 0 | /* ensure we're picking the first one to start so that we maintain the |
252 | 0 | * first-definition heap order */ |
253 | 0 | MVMint32 t = a; a = b; b = t; |
254 | 0 | } |
255 | 100k | sets[b].key = a; /* point b to a */ |
256 | 100k | live_range_merge(values + sets[a].idx, values + sets[b].idx); |
257 | 100k | return a; |
258 | 100k | } |
259 | | |
260 | | |
261 | 8.11M | MVM_STATIC_INLINE void heap_swap(MVMint32 *heap, MVMint32 a, MVMint32 b) { |
262 | 8.11M | MVMint32 t = heap[a]; |
263 | 8.11M | heap[a] = heap[b]; |
264 | 8.11M | heap[b] = t; |
265 | 8.11M | } |
266 | | |
267 | | /* Functions to maintain a heap of references to the live ranges */ |
268 | | void live_range_heap_down(LiveRange *values, MVMint32 *heap, MVMint32 top, MVMint32 item, |
269 | 3.64M | MVMint32 (*cmp)(LiveRange *values, MVMint32 a, MVMint32 b)) { |
270 | 11.5M | while (item < top) { |
271 | 11.2M | MVMint32 left = item * 2 + 1; |
272 | 11.2M | MVMint32 right = left + 1; |
273 | 11.2M | MVMint32 swap; |
274 | 11.2M | if (right < top) { |
275 | 4.60M | swap = cmp(values, heap[left], heap[right]) < 0 ? left : right; |
276 | 3.43M | } else if (left < top) { |
277 | 645k | swap = left; |
278 | 2.79M | } else { |
279 | 2.79M | break; |
280 | 2.79M | } |
281 | 8.45M | if (cmp(values, heap[swap], heap[item]) < 0) { |
282 | 7.90M | heap_swap(heap, swap, item); |
283 | 7.90M | item = swap; |
284 | 552k | } else { |
285 | 552k | break; |
286 | 552k | } |
287 | 8.45M | } |
288 | 3.64M | } |
289 | | |
290 | | void live_range_heap_up(LiveRange *values, MVMint32 *heap, MVMint32 item, |
291 | 268k | MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) { |
292 | 479k | while (item > 0) { |
293 | 407k | MVMint32 parent = (item-1)/2; |
294 | 407k | if (cmp(values, heap[parent], heap[item]) > 0) { |
295 | 211k | heap_swap(heap, item, parent); |
296 | 211k | item = parent; |
297 | 196k | } else { |
298 | 196k | break; |
299 | 196k | } |
300 | 407k | } |
301 | 268k | } |
302 | | |
303 | | MVMint32 live_range_heap_pop(LiveRange *values, MVMint32 *heap, size_t *top, |
304 | 3.64M | MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) { |
305 | 3.64M | MVMint32 v = heap[0]; |
306 | 3.64M | MVMint32 t = --(*top); |
307 | 3.64M | /* pop by swap and heap-down */ |
308 | 3.64M | heap[0] = heap[t]; |
309 | 3.64M | live_range_heap_down(values, heap, t, 0, cmp); |
310 | 3.64M | return v; |
311 | 3.64M | } |
312 | | |
313 | | void live_range_heap_push(LiveRange *values, MVMint32 *heap, size_t *top, MVMint32 v, |
314 | 268k | MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) { |
315 | 268k | /* NB, caller should use MVM_ENSURE_SPACE prior to calling */ |
316 | 268k | MVMint32 t = (*top)++; |
317 | 268k | heap[t] = v; |
318 | 268k | live_range_heap_up(values, heap, t, cmp); |
319 | 268k | } |
320 | | |
321 | 9.54M | MVMint32 live_range_heap_peek(LiveRange *values, MVMint32 *heap) { |
322 | 9.54M | return values[heap[0]].start; |
323 | 9.54M | } |
324 | | |
325 | | void live_range_heapify(LiveRange *values, MVMint32 *heap, MVMint32 top, |
326 | 0 | MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) { |
327 | 0 | MVMint32 i = top, mid = top/2; |
328 | 0 | while (i-- > mid) { |
329 | 0 | live_range_heap_up(values, heap, i, cmp); |
330 | 0 | } |
331 | 0 | } |
332 | | |
333 | | |
334 | 16.6M | MVMint32 values_cmp_first_ref(LiveRange *values, MVMint32 a, MVMint32 b) { |
335 | 16.6M | return values[a].start - values[b].start; |
336 | 16.6M | } |
337 | | |
338 | 24.6k | MVMint32 values_cmp_last_ref(LiveRange *values, MVMint32 a, MVMint32 b) { |
339 | 24.6k | return values[a].end - values[b].end; |
340 | 24.6k | } |
341 | | |
342 | | /* register assignment logic */ |
343 | 4.30M | #define NEXT_IN_RING(a,x) (((x)+1) == MVM_ARRAY_SIZE(a) ? 0 : ((x)+1)) |
344 | 2.15M | MVMint8 get_register(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitStorageClass reg_cls) { |
345 | 2.15M | /* ignore storage class for now */ |
346 | 2.15M | MVMint8 reg_num; |
347 | 2.15M | reg_num = alc->reg_ring[alc->reg_take]; |
348 | 2.15M | if (reg_num >= 0) { |
349 | 2.15M | /* not empty */ |
350 | 2.15M | alc->reg_ring[alc->reg_take] = -1; /* mark used */ |
351 | 2.15M | alc->reg_take = NEXT_IN_RING(alc->reg_ring, alc->reg_take); |
352 | 2.15M | } |
353 | 2.15M | return reg_num; |
354 | 2.15M | } |
355 | | |
356 | 2.15M | void free_register(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitStorageClass reg_cls, MVMint8 reg_num) { |
357 | 2.15M | if (alc->reg_ring[alc->reg_give] != -1) { |
358 | 0 | MVM_oops(tc, "No space to release register %d to ring", reg_num); |
359 | 0 | } |
360 | 2.15M | alc->reg_ring[alc->reg_give] = reg_num; |
361 | 2.15M | alc->reg_give = NEXT_IN_RING(alc->reg_ring, alc->reg_give); |
362 | 2.15M | } |
363 | | |
364 | | void assign_register(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, |
365 | 3.86M | MVMint32 lv, MVMJitStorageClass reg_cls, MVMint8 reg_num) { |
366 | 3.86M | /* What to do here: |
367 | 3.86M | * - update tiles using this live range to refer to this register |
368 | 3.86M | * - update allocator to mark this register as used by this live range */ |
369 | 3.86M | LiveRange *range = alc->values + lv; |
370 | 3.86M | ValueRef *ref; |
371 | 3.86M | MVMint32 i; |
372 | 3.86M | |
373 | 3.86M | range->reg_cls = reg_cls; |
374 | 3.86M | range->reg_num = reg_num; |
375 | 12.4M | for (ref = range->first; ref != NULL; ref = ref->next) { |
376 | 8.57M | if (is_arglist_ref(list, ref)) { |
377 | 1.13M | /* don't assign registers to ARGLIST references, that will never |
378 | 1.13M | * work */ |
379 | 1.13M | continue; |
380 | 7.43M | } else { |
381 | 7.43M | MVMJitTile *tile = list->items[ref->tile_idx]; |
382 | 7.43M | tile->values[ref->value_idx] = reg_num; |
383 | 7.43M | } |
384 | 8.57M | } |
385 | 3.86M | |
386 | 11.5M | for (i = 0; i < 2; i++) { |
387 | 7.72M | MVMJitTile *tile = range->synthetic[i]; |
388 | 7.72M | if (tile != NULL) { |
389 | 484k | tile->values[i] = reg_num; |
390 | 484k | } |
391 | 7.72M | } |
392 | 3.86M | } |
393 | | |
394 | | |
395 | 1.80M | MVM_STATIC_INLINE void close_hole(RegisterAllocator *alc, MVMint32 ref, MVMint32 tile_idx) { |
396 | 1.80M | LiveRange *v = alc->values + ref; |
397 | 1.80M | if (v->holes && v->holes->start < order_nr(tile_idx)) { |
398 | 94.1k | v->holes->start = order_nr(tile_idx); |
399 | 94.1k | _DEBUG("Closed hole in live range %d at %d", ref, order_nr(tile_idx)); |
400 | 94.1k | } |
401 | 1.80M | } |
402 | | |
403 | 1.55M | MVM_STATIC_INLINE void open_hole(RegisterAllocator *alc, MVMint32 ref, MVMint32 tile_idx) { |
404 | 1.55M | LiveRange *v = alc->values + ref; |
405 | 1.55M | if (v->start < order_nr(tile_idx) && |
406 | 100k | (v->holes == NULL || v->holes->start > order_nr(tile_idx))) { |
407 | 100k | struct Hole *hole = alc->holes + alc->holes_top++; |
408 | 100k | hole->next = v->holes; |
409 | 100k | hole->start = 0; |
410 | 100k | hole->end = order_nr(tile_idx); |
411 | 100k | v->holes = hole; |
412 | 100k | _DEBUG("Opened hole in live range %d at %d", ref, order_nr(tile_idx)); |
413 | 100k | } |
414 | 1.55M | } |
415 | | |
416 | | /* Find holes in live ranges, as per Wimmer (2010). This is required only |
417 | | * because the spill-strategy arround CALLs is (sometimes) to load-and-restore, |
418 | | * rather than do a full spill, in the not-so-rare case that many of the live |
419 | | * values will be temporaries and the call is only necessary in a branch */ |
420 | | |
421 | 58.8k | static void find_holes(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list) { |
422 | 58.8k | MVMint32 i, j, k; |
423 | 58.8k | |
424 | 58.8k | MVMint32 bitmap_size = (alc->values_num >> 5) + 1; |
425 | 58.8k | |
426 | 58.8k | /* convenience macros */ |
427 | 1.39M | #define _BITMAP(_a) (bitmaps + (_a)*bitmap_size) |
428 | 58.8k | #define _SUCC(_a, _z) (list->blocks[(_a)].succ[(_z)]) |
429 | 58.8k | |
430 | 58.8k | MVMBitmap *bitmaps = MVM_calloc(list->blocks_num + 1, sizeof(MVMBitmap) * bitmap_size); |
431 | 58.8k | /* last bitmap is allocated to hold diff, which is how we know which live |
432 | 58.8k | * ranges holes potentially need to be closed */ |
433 | 58.8k | MVMBitmap *diff = _BITMAP(list->blocks_num); |
434 | 58.8k | |
435 | 634k | for (j = list->blocks_num - 1; j >= 0; j--) { |
436 | 575k | MVMBitmap *live_in = _BITMAP(j); |
437 | 575k | MVMint32 start = list->blocks[j].start, end = list->blocks[j].end; |
438 | 575k | if (list->blocks[j].num_succ == 2) { |
439 | 238k | /* live out is union of successors' live_in */ |
440 | 238k | MVMBitmap *a = _BITMAP(_SUCC(j, 0)), *b = _BITMAP(_SUCC(j, 1)); |
441 | 238k | |
442 | 238k | MVM_bitmap_union(live_in, a, b, bitmap_size); |
443 | 238k | MVM_bitmap_difference(diff, a, b, bitmap_size); |
444 | 238k | |
445 | 670k | for (k = 0; k < bitmap_size; k++) { |
446 | 432k | MVMBitmap additions = diff[k]; |
447 | 763k | while (additions) { |
448 | 331k | MVMint32 bit = MVM_FFS(additions) - 1; |
449 | 331k | MVMint32 val = (k << 6) + bit; |
450 | 331k | close_hole(alc, val, end); |
451 | 331k | MVM_bitmap_delete(&additions, bit); |
452 | 331k | } |
453 | 432k | } |
454 | 337k | } else if (list->blocks[j].num_succ == 1) { |
455 | 278k | memcpy(live_in, _BITMAP(_SUCC(j, 0)), |
456 | 278k | sizeof(MVMBitmap) * bitmap_size); |
457 | 278k | } |
458 | 575k | |
459 | 4.05M | for (i = end - 1; i >= start; i--) { |
460 | 3.48M | MVMJitTile *tile = list->items[i]; |
461 | 3.48M | if (tile->op == MVM_JIT_ARGLIST) { |
462 | 135k | /* list of uses, all very real */ |
463 | 135k | MVMint32 nchild = list->tree->nodes[tile->node + 1]; |
464 | 585k | for (k = 0; k < nchild; k++) { |
465 | 449k | MVMint32 carg = list->tree->nodes[tile->node + 2 + k]; |
466 | 449k | MVMint32 ref = value_set_find(alc->sets, list->tree->nodes[carg + 1])->idx; |
467 | 449k | if (!MVM_bitmap_get(live_in, ref)) { |
468 | 427k | MVM_bitmap_set(live_in, ref); |
469 | 427k | close_hole(alc, ref, i); |
470 | 427k | } |
471 | 449k | } |
472 | 3.34M | } else if (tile->op == MVM_JIT_IF || tile->op == MVM_JIT_DO || tile->op == MVM_JIT_COPY) { |
473 | 295k | /* not a real use, no work needed here (we already merged them) */ |
474 | 3.05M | } else { |
475 | 3.05M | /* If a value is used and defined by the same tile, then the |
476 | 3.05M | * 'hole' only covers that single tile. The definitions must |
477 | 3.05M | * therefore be handled before the uses */ |
478 | 3.05M | |
479 | 3.05M | if (MVM_JIT_TILE_YIELDS_VALUE(tile)) { |
480 | 1.55M | MVMint32 ref = value_set_find(alc->sets, tile->node)->idx; |
481 | 1.55M | open_hole(alc, ref, i); |
482 | 1.55M | MVM_bitmap_delete(live_in, ref); |
483 | 1.55M | } |
484 | 5.18M | for (k = 0; k < tile->num_refs; k++) { |
485 | 2.12M | if (MVM_JIT_REGISTER_IS_USED(MVM_JIT_REGISTER_FETCH(tile->register_spec, k+1))) { |
486 | 1.74M | MVMint32 ref = value_set_find(alc->sets, tile->refs[k])->idx; |
487 | 1.74M | if (!MVM_bitmap_get(live_in, ref)) { |
488 | 1.04M | MVM_bitmap_set(live_in, ref); |
489 | 1.04M | close_hole(alc, ref, i); |
490 | 1.04M | } |
491 | 1.74M | } |
492 | 2.12M | } |
493 | 3.05M | } |
494 | 3.48M | } |
495 | 575k | } |
496 | 58.8k | MVM_free(bitmaps); |
497 | 58.8k | |
498 | 58.8k | #undef _BITMAP |
499 | 58.8k | #undef _SUCC |
500 | 58.8k | } |
501 | | |
502 | | |
503 | 253k | static void determine_live_ranges(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list) { |
504 | 253k | MVMJitExprTree *tree = list->tree; |
505 | 253k | MVMint32 i, j, n; |
506 | 253k | MVMint32 num_phi = 0; /* pessimistic but correct upper bound of number of holes */ |
507 | 253k | |
508 | 253k | alc->sets = MVM_calloc(tree->nodes_num, sizeof(UnionFind)); |
509 | 253k | /* up to 4 refs per tile (1 out, 3 in) plus the number of refs per arglist */ |
510 | 253k | alc->refs = MVM_calloc(list->items_num * 4 + list->num_arglist_refs, sizeof(ValueRef)); |
511 | 253k | alc->refs_num = 0; |
512 | 253k | |
513 | 253k | MVM_VECTOR_INIT(alc->values, list->items_num); |
514 | 253k | MVM_VECTOR_INIT(alc->worklist, list->items_num); |
515 | 253k | |
516 | 7.05M | for (i = 0; i < list->items_num; i++) { |
517 | 6.80M | MVMJitTile *tile = list->items[i]; |
518 | 6.80M | MVMint32 node = tile->node; |
519 | 6.80M | /* Each of the following counts as either an alias or as a PHI (in case |
520 | 6.80M | * of IF), and thus these are not actual definitions */ |
521 | 6.80M | if (tile->op == MVM_JIT_COPY) { |
522 | 189k | MVMint32 ref = tree->nodes[tile->node + 1]; |
523 | 189k | _DEBUG("Unify COPY node (%d -> %d)", tile->node, ref); |
524 | 189k | alc->sets[node].key = ref; /* point directly to actual definition */ |
525 | 6.61M | } else if (tile->op == MVM_JIT_DO) { |
526 | 111k | MVMint32 nchild = tree->nodes[tile->node + 1]; |
527 | 111k | MVMint32 ref = tree->nodes[tile->node + 1 + nchild]; |
528 | 111k | _DEBUG("Unify COPY DO (%d -> %d)", tile->node, ref); |
529 | 111k | alc->sets[node].key = ref; |
530 | 6.50M | } else if (tile->op == MVM_JIT_IF) { |
531 | 100k | MVMint32 left_cond = tree->nodes[tile->node + 2]; |
532 | 100k | MVMint32 right_cond = tree->nodes[tile->node + 3]; |
533 | 100k | /* NB; this may cause a conflict in register requirements, in which |
534 | 100k | * case we should resolve it by creating a new live range or inserting |
535 | 100k | * a copy */ |
536 | 100k | alc->sets[node].key = value_set_union(alc->sets, alc->values, left_cond, right_cond); |
537 | 100k | _DEBUG("Merging nodes %d and %d to %d (result key = %d)", left_cond, right_cond, node, alc->sets[node].key); |
538 | 100k | num_phi++; |
539 | 6.40M | } else if (tile->op == MVM_JIT_ARGLIST) { |
540 | 303k | MVMint32 num_args = tree->nodes[tile->node + 1]; |
541 | 303k | MVMJitExprNode *refs = tree->nodes + tile->node + 2; |
542 | 303k | _DEBUG("Adding %d references to ARGLIST node", num_args); |
543 | 1.41M | for (j = 0; j < num_args; j++) { |
544 | 1.11M | MVMint32 carg = refs[j]; |
545 | 1.11M | MVMint32 value = list->tree->nodes[carg+1]; |
546 | 1.11M | MVMint32 idx = value_set_find(alc->sets, value)->idx; |
547 | 1.11M | _DEBUG(" Reference %d", idx); |
548 | 1.11M | live_range_add_ref(alc, alc->values + idx, i, j + 1); |
549 | 1.11M | /* include the CALL node into the arglist child range, so we |
550 | 1.11M | * don't release them too early */ |
551 | 1.11M | alc->values[idx].end = MAX(alc->values[idx].end, order_nr(i + 1)); |
552 | 1.11M | } |
553 | 6.09M | } else { |
554 | 6.09M | /* create a live range if necessary */ |
555 | 6.09M | if (MVM_JIT_TILE_YIELDS_VALUE(tile)) { |
556 | 3.47M | MVMint8 register_spec = MVM_JIT_REGISTER_FETCH(tile->register_spec, 0); |
557 | 3.47M | MVMint32 idx = live_range_init(alc); |
558 | 3.47M | alc->sets[node].key = node; |
559 | 3.47M | alc->sets[node].idx = idx; |
560 | 3.47M | _DEBUG("Create live range %d (tile=%d, node=%d)", idx,i, node); |
561 | 3.47M | live_range_add_ref(alc, alc->values + idx, i, 0); |
562 | 3.47M | if (MVM_JIT_REGISTER_HAS_REQUIREMENT(register_spec)) { |
563 | 1.42M | alc->values[idx].register_spec = register_spec; |
564 | 1.42M | } |
565 | 3.47M | MVM_VECTOR_PUSH(alc->worklist, idx); |
566 | 3.47M | } |
567 | 6.09M | /* account for uses */ |
568 | 10.4M | for (j = 0; j < tile->num_refs; j++) { |
569 | 4.33M | MVMint8 register_spec = MVM_JIT_REGISTER_FETCH(tile->register_spec, j+1); |
570 | 4.33M | /* any 'use' register requirements are handled in the allocation step */ |
571 | 4.33M | if (MVM_JIT_REGISTER_IS_USED(register_spec)) { |
572 | 3.50M | MVMint32 idx = value_set_find(alc->sets, tile->refs[j])->idx; |
573 | 3.50M | _DEBUG("Adding reference to live range %d from tile %d", idx, i); |
574 | 3.50M | live_range_add_ref(alc, alc->values + idx, i, j + 1); |
575 | 3.50M | } |
576 | 4.33M | } |
577 | 6.09M | } |
578 | 6.80M | if (MVM_JIT_TILE_YIELDS_VALUE(tile) && tree->info[node].opr_type != 0) { |
579 | 888k | LiveRange *range = alc->values + value_set_find(alc->sets, node)->idx; |
580 | 888k | _ASSERT(range->reg_type == 0 || (range->reg_type << 3) == tree->info[node].opr_type, |
581 | 888k | "Register types do not match between value and node"); |
582 | 888k | /* shift to match MVM_reg_types. should arguably be a macro maybe */ |
583 | 888k | range->reg_type = tree->info[node].opr_type >> 3; |
584 | 888k | _DEBUG( "Assigned type: %d to live range %d\n", range->reg_type, range - alc->values); |
585 | 888k | } |
586 | 6.80M | } |
587 | 253k | if (num_phi > 0) { |
588 | 58.8k | /* If there are PHI nodes, there will be holes. |
589 | 58.8k | * The array allocated here will be used to construct linked lists */ |
590 | 58.8k | alc->holes = MVM_malloc(num_phi * sizeof(struct Hole)); |
591 | 58.8k | alc->holes_top = 0; |
592 | 58.8k | find_holes(tc, alc, list); |
593 | 58.8k | |
594 | 58.8k | /* eliminate empty values from the worklist */ |
595 | 1.61M | for (i = 0, j = 0; j < alc->worklist_num; j++) { |
596 | 1.55M | if (!live_range_is_empty(alc->values + alc->worklist[j])) { |
597 | 1.45M | alc->worklist[i++] = alc->worklist[j]; |
598 | 1.45M | } |
599 | 1.55M | } |
600 | 58.8k | alc->worklist_num = i; |
601 | 58.8k | |
602 | 194k | } else { |
603 | 194k | alc->holes = NULL; |
604 | 194k | alc->holes_top = 0; |
605 | 194k | } |
606 | 253k | } |
607 | | |
608 | | /* The code below needs some thinking... */ |
609 | 2.15M | static void active_set_add(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 a) { |
610 | 2.15M | /* the original linear-scan heuristic for spilling is to take the last value |
611 | 2.15M | * in the set to expire, freeeing up the largest extent of code... that is a |
612 | 2.15M | * reasonably good heuristic, albeit not essential to the concept of linear |
613 | 2.15M | * scan. It makes sense to keep the stack ordered at all times (simplest by |
614 | 2.15M | * use of insertion sort). Although insertion sort is O(n^2), n is never |
615 | 2.15M | * large in this case (32 for RISC architectures, maybe, if we ever support |
616 | 2.15M | * them; 7 for x86-64. So the time spent on insertion sort is always small |
617 | 2.15M | * and bounded by a constant, hence O(1). Yes, algorithmics works this way |
618 | 2.15M | * :-) */ |
619 | 2.15M | MVMint32 i; |
620 | 3.65M | for (i = 0; i < alc->active_top; i++) { |
621 | 2.03M | MVMint32 b = alc->active[i]; |
622 | 2.03M | if (alc->values[b].end > alc->values[a].end) { |
623 | 534k | /* insert a before b */ |
624 | 534k | memmove(alc->active + i + 1, alc->active + i, sizeof(MVMint32)*(alc->active_top - i)); |
625 | 534k | alc->active[i] = a; |
626 | 534k | alc->active_top++; |
627 | 534k | return; |
628 | 534k | } |
629 | 2.03M | } |
630 | 2.15M | /* append at the end */ |
631 | 1.61M | alc->active[alc->active_top++] = a; |
632 | 1.61M | } |
633 | | |
634 | | |
635 | | |
636 | | /* Take live ranges from active_set whose last use was after position and append them to the retired list */ |
637 | 3.83M | static void active_set_expire(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 order_nr) { |
638 | 3.83M | MVMint32 i; |
639 | 5.91M | for (i = 0; i < alc->active_top; i++) { |
640 | 3.89M | MVMint32 v = alc->active[i]; |
641 | 3.89M | MVMint8 reg_num = alc->values[v].reg_num; |
642 | 3.89M | if (alc->values[v].end > order_nr) { |
643 | 1.81M | break; |
644 | 2.08M | } else { |
645 | 2.08M | _DEBUG("Live range %d is out of scope (last ref %d, %d) and releasing register %d", |
646 | 2.08M | v, alc->values[v].end, order_nr, reg_num); |
647 | 2.08M | free_register(tc, alc, MVM_JIT_STORAGE_GPR, reg_num); |
648 | 2.08M | } |
649 | 3.89M | } |
650 | 3.83M | /* shift off the first x values from the live set. */ |
651 | 3.83M | if (i > 0) { |
652 | 1.39M | MVM_VECTOR_APPEND(alc->retired, alc->active, i); |
653 | 1.39M | alc->active_top -= i; |
654 | 1.39M | if (alc->active_top > 0) { |
655 | 410k | memmove(alc->active, alc->active + i, |
656 | 410k | sizeof(alc->active[0]) * alc->active_top); |
657 | 410k | } |
658 | 1.39M | } |
659 | 3.83M | } |
660 | | |
661 | | /* Compute the earliest live range that is still active. */ |
662 | 3.57M | static MVMint32 earliest_active_value(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 tile_order_nr) { |
663 | 3.57M | /* can we cache this, and does it make sense to do so? */ |
664 | 3.57M | int i; |
665 | 6.87M | for (i = 0; i < alc->active_top; i++) { |
666 | 3.29M | tile_order_nr = MIN(tile_order_nr, alc->values[alc->active[i]].start); |
667 | 3.29M | } |
668 | 3.57M | return tile_order_nr; |
669 | 3.57M | } |
670 | | |
671 | 90 | static void active_set_splice(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 to_splice) { |
672 | 90 | MVMint32 i ; |
673 | 90 | /* find (reverse, because it's usually the last); predecrement alc->active_top because we're removing one item */ |
674 | 90 | for (i = --alc->active_top; i >= 0; i--) { |
675 | 90 | if (alc->active[i] == to_splice) |
676 | 90 | break; |
677 | 90 | } |
678 | 90 | if (i >= 0 && i < alc->active_top) { |
679 | 0 | /* shift out */ |
680 | 0 | memmove(alc->active + i, alc->active + i + 1, |
681 | 0 | sizeof(alc->active[0]) * alc->active_top - i); |
682 | 0 | } |
683 | 90 | } |
684 | | |
685 | 3.83M | static void spill_set_release(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 order_nr) { |
686 | 3.89M | while (alc->spilled_num > 0 && alc->values[alc->spilled[0]].end <= order_nr) { |
687 | 66.6k | MVMint32 spilled = live_range_heap_pop(alc->values, alc->spilled, &alc->spilled_num, |
688 | 66.6k | values_cmp_last_ref); |
689 | 66.6k | LiveRange *value = alc->values + spilled; |
690 | 66.6k | _DEBUG("VM Register %d for live range %d can be released", |
691 | 66.6k | value->spill_pos / sizeof(MVMRegister), spilled); |
692 | 66.6k | MVM_jit_spill_memory_release(tc, alc->compiler, value->spill_pos, value->reg_type); |
693 | 66.6k | } |
694 | 3.83M | } |
695 | | |
696 | | static MVMint32 insert_load_before_use(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, |
697 | 397k | ValueRef *ref, MVMint32 load_pos) { |
698 | 397k | MVMint32 n = live_range_init(alc); |
699 | 397k | MVMJitTile *tile = MVM_jit_tile_make(tc, alc->compiler, MVM_jit_compile_load, 2, 1, |
700 | 397k | MVM_JIT_STORAGE_LOCAL, load_pos, 0); |
701 | 397k | LiveRange *range = alc->values + n; |
702 | 397k | MVM_jit_tile_list_insert(tc, list, tile, ref->tile_idx - 1, +1); /* insert just prior to use */ |
703 | 397k | range->synthetic[0] = tile; |
704 | 397k | range->first = range->last = ref; |
705 | 397k | |
706 | 397k | range->start = order_nr(ref->tile_idx) - 1; |
707 | 397k | range->end = order_nr(ref->tile_idx); |
708 | 397k | return n; |
709 | 397k | } |
710 | | |
711 | | static MVMint32 insert_store_after_definition(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, |
712 | 86.0k | ValueRef *ref, MVMint32 store_pos) { |
713 | 86.0k | MVMint32 n = live_range_init(alc); |
714 | 86.0k | MVMJitTile *tile = MVM_jit_tile_make(tc, alc->compiler, MVM_jit_compile_store, 2, 2, |
715 | 86.0k | MVM_JIT_STORAGE_LOCAL, store_pos, 0, 0); |
716 | 86.0k | LiveRange *range = alc->values + n; |
717 | 86.0k | MVM_jit_tile_list_insert(tc, list, tile, ref->tile_idx, -1); /* insert just after storage */ |
718 | 86.0k | range->synthetic[1] = tile; |
719 | 86.0k | range->first = range->last = ref; |
720 | 86.0k | |
721 | 86.0k | range->start = order_nr(ref->tile_idx); |
722 | 86.0k | range->end = order_nr(ref->tile_idx) + 1; |
723 | 86.0k | return n; |
724 | 86.0k | } |
725 | | |
726 | 90 | static MVMint32 select_live_range_for_spill(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 code_pos) { |
727 | 90 | return alc->active[alc->active_top-1]; |
728 | 90 | } |
729 | | |
730 | | |
731 | | static void live_range_spill(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, |
732 | 66.6k | MVMint32 to_spill, MVMint32 spill_pos, MVMint32 code_pos) { |
733 | 66.6k | |
734 | 66.6k | MVMint8 reg_spilled = alc->values[to_spill].reg_num; |
735 | 66.6k | /* loop over all value refs */ |
736 | 66.6k | _DEBUG("Spilling live range value %d to memory position %d at %d", to_spill, spill_pos, code_pos); |
737 | 66.6k | |
738 | 623k | while (alc->values[to_spill].first != NULL) { |
739 | 557k | /* make a new live range */ |
740 | 557k | MVMint32 n; |
741 | 557k | |
742 | 557k | /* shift current ref */ |
743 | 557k | ValueRef *ref = alc->values[to_spill].first; |
744 | 557k | alc->values[to_spill].first = ref->next; |
745 | 557k | ref->next = NULL; |
746 | 557k | |
747 | 557k | if (is_arglist_ref(list, ref) && order_nr(ref->tile_idx) > code_pos) { |
748 | 73.0k | /* Never insert a load before a future ARGLIST; ARGLIST may easily |
749 | 73.0k | * consume more registers than we have available. Past ARGLISTs have |
750 | 73.0k | * already been handled, so we do need to insert a load a before |
751 | 73.0k | * them (or modify in place, but, complex!). */ |
752 | 73.0k | continue; |
753 | 484k | } else if (is_definition(ref)) { |
754 | 86.0k | n = insert_store_after_definition(tc, alc, list, ref, spill_pos); |
755 | 397k | } else { |
756 | 397k | n = insert_load_before_use(tc, alc, list, ref, spill_pos); |
757 | 397k | } |
758 | 557k | |
759 | 484k | if (order_nr(ref->tile_idx) < code_pos) { |
760 | 282k | /* in the past, which means we can safely use the spilled register |
761 | 282k | * and immediately retire this live range */ |
762 | 282k | assign_register(tc, alc, list, n, MVM_JIT_STORAGE_GPR, reg_spilled); |
763 | 282k | MVM_VECTOR_PUSH(alc->retired, n); |
764 | 201k | } else { |
765 | 201k | /* in the future, which means we need to add it to the worklist */ |
766 | 201k | MVM_VECTOR_ENSURE_SPACE(alc->worklist, 1); |
767 | 201k | live_range_heap_push(alc->values, alc->worklist, &alc->worklist_num, n, |
768 | 201k | values_cmp_first_ref); |
769 | 201k | } |
770 | 484k | } |
771 | 66.6k | |
772 | 66.6k | /* clear value references */ |
773 | 66.6k | alc->values[to_spill].last = NULL; |
774 | 66.6k | |
775 | 66.6k | /* mark as spilled and store the spill position */ |
776 | 66.6k | alc->values[to_spill].spill_pos = spill_pos; |
777 | 66.6k | alc->values[to_spill].spill_idx = code_pos; |
778 | 66.6k | free_register(tc, alc, MVM_JIT_STORAGE_GPR, reg_spilled); |
779 | 66.6k | MVM_VECTOR_ENSURE_SPACE(alc->spilled, 1); |
780 | 66.6k | live_range_heap_push(alc->values, alc->spilled, &alc->spilled_num, |
781 | 66.6k | to_spill, values_cmp_last_ref); |
782 | 66.6k | } |
783 | | |
784 | | |
785 | | static void prepare_arglist_and_call(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, |
786 | 303k | MVMint32 arglist_idx, MVMint32 call_idx) { |
787 | 303k | MVMJitTile *arglist_tile = list->items[arglist_idx], |
788 | 303k | *call_tile = list->items[call_idx]; |
789 | 303k | MVMJitExprTree *tree = list->tree; |
790 | 303k | MVMint32 num_args = tree->nodes[arglist_tile->node + 1]; |
791 | 303k | MVMint32 arg_values[16]; |
792 | 303k | MVMJitStorageRef storage_refs[16]; |
793 | 303k | |
794 | 303k | struct { |
795 | 303k | MVMint8 in_reg; /* register number that is to be moved in */ |
796 | 303k | MVMint8 num_out; /* how many values need to make room for me */ |
797 | 303k | } topological_map[MVM_JIT_ARCH_NUM_GPR]; /* reverse map for topological sort of moves */ |
798 | 303k | |
799 | 303k | struct { |
800 | 303k | MVMint8 reg_num; |
801 | 303k | MVMint8 stack_pos; |
802 | 303k | } stack_transfer[16]; |
803 | 303k | MVMint32 stack_transfer_top = 0; |
804 | 303k | |
805 | 303k | MVMint8 transfer_queue[16]; |
806 | 303k | MVMint32 transfer_queue_idx, transfer_queue_top = 0, transfers_required = 0; |
807 | 303k | |
808 | 303k | MVMint8 spilled_args[16]; |
809 | 303k | MVMint32 spilled_args_top = 0; |
810 | 303k | |
811 | 303k | MVMBitmap call_bitmap = 0, arg_bitmap = 0; |
812 | 303k | |
813 | 303k | MVMint8 spare_register; |
814 | 303k | |
815 | 303k | MVMint32 i, j, ins_pos = 2; |
816 | 303k | |
817 | 303k | /* get storage positions for arglist */ |
818 | 303k | MVM_jit_arch_storage_for_arglist(tc, alc->compiler, tree, arglist_tile->node, storage_refs); |
819 | 303k | |
820 | 303k | /* get value refs for arglist */ |
821 | 1.41M | for (i = 0; i < num_args; i++) { |
822 | 1.11M | MVMint32 carg = tree->nodes[arglist_tile->node + 2 + i]; |
823 | 1.11M | /* may refer to spilled live range */ |
824 | 1.11M | arg_values[i] = value_set_find(alc->sets, tree->nodes[carg + 1])->idx; |
825 | 1.11M | } |
826 | 303k | |
827 | 303k | _DEBUG("prepare_call: Got %d args", num_args); |
828 | 303k | |
829 | 303k | /* initialize topological map, use -1 as 'undefined' inboud value */ |
830 | 5.16M | for (i = 0; i < MVM_ARRAY_SIZE(topological_map); i++) { |
831 | 4.86M | topological_map[i].num_out = 0; |
832 | 4.86M | topological_map[i].in_reg = -1; |
833 | 4.86M | } |
834 | 303k | |
835 | 1.14M | for (i = 0, j = 0; i < alc->active_top; i++) { |
836 | 843k | LiveRange *v = alc->values + alc->active[i]; |
837 | 843k | MVMint32 code_pos = order_nr(call_idx); |
838 | 843k | if (v->end > code_pos && live_range_has_hole(v, code_pos) == NULL) { |
839 | 66.5k | /* surviving values need to be spilled */ |
840 | 66.5k | MVMint32 spill_pos = MVM_jit_spill_memory_select(tc, alc->compiler, v->reg_type); |
841 | 66.5k | /* spilling at the CALL idx will mean that the spiller inserts a |
842 | 66.5k | * LOAD at the current register before the ARGLIST, meaning it |
843 | 66.5k | * remains 'live' for this ARGLIST */ |
844 | 66.5k | _DEBUG("Spilling %d to %d at %d", alc->active[i], spill_pos, code_pos); |
845 | 66.5k | live_range_spill(tc, alc, list, alc->active[i], spill_pos, code_pos); |
846 | 776k | } else { |
847 | 776k | /* compact the active set */ |
848 | 776k | alc->active[j++] = alc->active[i]; |
849 | 776k | } |
850 | 843k | } |
851 | 303k | alc->active_top = j; |
852 | 303k | |
853 | 1.41M | for (i = 0; i < num_args; i++) { |
854 | 1.11M | LiveRange *v = alc->values + arg_values[i]; |
855 | 1.11M | if (v->spill_idx < order_nr(call_idx)) { |
856 | 72.9k | /* spilled prior to the ARGLIST/CALL */ |
857 | 72.9k | spilled_args[spilled_args_top++] = i; |
858 | 1.03M | } else if (storage_refs[i]._cls == MVM_JIT_STORAGE_GPR) { |
859 | 1.00M | MVMint8 reg_num = storage_refs[i]._pos; |
860 | 1.00M | if (reg_num != v->reg_num) { |
861 | 922k | _DEBUG("Transfer Rq(%d) -> Rq(%d)", reg_num, v->reg_num); |
862 | 922k | topological_map[reg_num].in_reg = v->reg_num; |
863 | 922k | topological_map[v->reg_num].num_out++; |
864 | 922k | transfers_required++; |
865 | 86.2k | } else { |
866 | 86.2k | _DEBUG("Transfer Rq(%d) not required", reg_num); |
867 | 86.2k | } |
868 | 28.3k | } else if (storage_refs[i]._cls == MVM_JIT_STORAGE_STACK) { |
869 | 28.3k | /* enqueue for stack transfer */ |
870 | 28.3k | stack_transfer[stack_transfer_top].reg_num = v->reg_num; |
871 | 28.3k | stack_transfer[stack_transfer_top].stack_pos = storage_refs[i]._pos; |
872 | 28.3k | stack_transfer_top++; |
873 | 28.3k | /* count the outbound edge */ |
874 | 28.3k | topological_map[v->reg_num].num_out++; |
875 | 0 | } else { |
876 | 0 | NYI(this_storage_class); |
877 | 0 | } |
878 | 1.11M | |
879 | 1.11M | /* set bitmap */ |
880 | 1.11M | if (storage_refs[i]._cls == MVM_JIT_STORAGE_GPR) { |
881 | 1.08M | MVMint8 reg_num = storage_refs[i]._pos; |
882 | 1.08M | MVM_bitmap_set(&arg_bitmap, reg_num); |
883 | 1.08M | } |
884 | 1.11M | } |
885 | 303k | |
886 | 303k | _DEBUG("%d transfers required", transfers_required); |
887 | 303k | |
888 | 1.11M | #define INSERT_TILE(_tile, _pos, _order) MVM_jit_tile_list_insert(tc, list, _tile, _pos, _order) |
889 | 1.11M | #define INSERT_NEXT_TILE(_tile) INSERT_TILE(_tile, arglist_idx, ins_pos++) |
890 | 303k | #define MAKE_TILE(_code, _narg, _nval, ...) MVM_jit_tile_make(tc, alc->compiler, MVM_jit_compile_ ## _code, _narg, _nval, __VA_ARGS__) |
891 | 303k | |
892 | 1.01M | #define INSERT_MOVE(_a, _b) INSERT_NEXT_TILE(MAKE_TILE(move, 0, 2, _a, _b)) |
893 | 28.3k | #define INSERT_COPY_TO_STACK(_s, _r) INSERT_NEXT_TILE(MAKE_TILE(store, 2, 2, MVM_JIT_STORAGE_STACK, _s, 0, _r)) |
894 | 72.9k | #define INSERT_LOAD_LOCAL(_r, _l) INSERT_NEXT_TILE(MAKE_TILE(load, 2, 1, MVM_JIT_STORAGE_LOCAL, _l, _r)) |
895 | 303k | #define INSERT_LOCAL_STACK_COPY(_s, _l) \ |
896 | 0 | INSERT_NEXT_TILE(MAKE_TILE(memory_copy, 4, 2, MVM_JIT_STORAGE_STACK, _s, MVM_JIT_STORAGE_LOCAL, _l, 0, spare_register)) |
897 | 303k | |
898 | 945k | #define ENQUEUE_TRANSFER(_r) do { \ |
899 | 945k | MVMint8 _c = (_r); \ |
900 | 945k | if (--(topological_map[(_c)].num_out) == 0 && \ |
901 | 945k | topological_map[(_c)].in_reg >= 0) { \ |
902 | 311k | transfer_queue[transfer_queue_top++] = _c; \ |
903 | 311k | } \ |
904 | 945k | } while(0) |
905 | 303k | |
906 | 303k | |
907 | 303k | /* resolve conflicts for CALL; since we're spilling any surviving bits, |
908 | 303k | * we can just move it to any free registers. */ |
909 | 712k | for (i = 0; i < call_tile->num_refs; i++) { |
910 | 408k | MVMint8 spec = MVM_JIT_REGISTER_FETCH(call_tile->register_spec, i + 1); |
911 | 408k | if (MVM_JIT_REGISTER_IS_USED(spec)) { |
912 | 104k | MVMint8 reg = call_tile->values[i+1]; |
913 | 104k | MVM_bitmap_set(&call_bitmap, reg); |
914 | 104k | } |
915 | 408k | } |
916 | 303k | |
917 | 363k | while (call_bitmap & arg_bitmap) { |
918 | 59.5k | MVMuint32 free_reg = ~(call_bitmap | arg_bitmap | NVR_GPR_BITMAP); |
919 | 59.5k | /* FFS counts registers starting from 1 */ |
920 | 59.5k | MVMuint8 src = MVM_FFS(call_bitmap & arg_bitmap) - 1; |
921 | 59.5k | MVMuint8 dst = MVM_FFS(free_reg) - 1; |
922 | 59.5k | |
923 | 59.5k | _ASSERT(free_reg != 0, "JIT: need to move a register but nothing is free"); |
924 | 59.5k | |
925 | 59.5k | /* add edge */ |
926 | 59.5k | topological_map[dst].in_reg = src; |
927 | 59.5k | topological_map[src].num_out++; |
928 | 59.5k | |
929 | 59.5k | /* update bitmap */ |
930 | 59.5k | MVM_bitmap_delete(&call_bitmap, src); |
931 | 59.5k | MVM_bitmap_set(&call_bitmap, dst); |
932 | 59.5k | |
933 | 59.5k | /* update CALL args */ |
934 | 178k | for (i = 0; i < call_tile->num_refs; i++) { |
935 | 119k | if (call_tile->values[i+1] == src) { |
936 | 59.5k | call_tile->values[i+1] = dst; |
937 | 59.5k | } |
938 | 119k | } |
939 | 59.5k | } |
940 | 303k | |
941 | 303k | |
942 | 303k | /* at this point, all outbound edges have been created, and none have been |
943 | 303k | * processed yet, so we can eqnueue all 'free' transfers */ |
944 | 5.16M | for (i = 0; i < MVM_ARRAY_SIZE(topological_map); i++) { |
945 | 4.86M | if (topological_map[i].num_out == 0 && |
946 | 3.85M | topological_map[i].in_reg >= 0) { |
947 | 606k | _DEBUG("Directly transfer %d -> %d", topological_map[i].in_reg, i); |
948 | 606k | transfer_queue[transfer_queue_top++] = i; |
949 | 606k | } |
950 | 4.86M | } |
951 | 303k | |
952 | 303k | /* with call_bitmap and arg_bitmap given, we can determine the spare |
953 | 303k | * register used for allocation; NB this may only be necessary in some |
954 | 303k | * cases */ |
955 | 303k | spare_register = MVM_FFS(~(call_bitmap | arg_bitmap | NVR_GPR_BITMAP)) - 1; |
956 | 303k | _ASSERT(spare_register >= 0, "JIT: No spare register for moves"); |
957 | 303k | |
958 | 332k | for (i = 0; i < stack_transfer_top; i++) { |
959 | 28.3k | MVMint8 reg_num = stack_transfer[i].reg_num; |
960 | 28.3k | MVMint8 stk_pos = stack_transfer[i].stack_pos; |
961 | 28.3k | INSERT_COPY_TO_STACK(stk_pos, reg_num); |
962 | 28.3k | _DEBUG("Insert stack parameter: Rq(%d) -> [rsp+%d]", reg_num, stk_pos); |
963 | 28.3k | ENQUEUE_TRANSFER(reg_num); |
964 | 28.3k | } |
965 | 303k | |
966 | 303k | |
967 | 1.22M | for (transfer_queue_idx = 0; transfer_queue_idx < transfer_queue_top; transfer_queue_idx++) { |
968 | 917k | MVMint8 dst = transfer_queue[transfer_queue_idx]; |
969 | 917k | MVMint8 src = topological_map[dst].in_reg; |
970 | 917k | _ASSERT(src >= 0, "No inboud edge (sort)"); |
971 | 917k | _DEBUG("Insert move (toposort): Rq(%d) -> Rq(%d)", src, dst); |
972 | 917k | INSERT_MOVE(dst, src); |
973 | 917k | ENQUEUE_TRANSFER(src); |
974 | 917k | } |
975 | 303k | |
976 | 303k | if (transfer_queue_top < transfers_required) { |
977 | 32.1k | /* suppose we have a cycle of transfers: a -> b -> c -> a; |
978 | 32.1k | * since we only keep the one inbound register as a reference, the chain |
979 | 32.1k | * is really: |
980 | 32.1k | * a -> c -> b -> a |
981 | 32.1k | * We can 'break' this cycle by walking the chain in this order, first |
982 | 32.1k | * moving 'a' out of thee way into a spare register, then moving c to a, |
983 | 32.1k | * b to c, and finally moving the spare register to 'b' |
984 | 32.1k | */ |
985 | 546k | for (i = 0; i < MVM_JIT_ARCH_NUM_GPR; i++) { |
986 | 514k | if (topological_map[i].num_out > 0) { |
987 | 32.1k | MVMint8 src, dst; |
988 | 32.1k | INSERT_MOVE(spare_register, i); |
989 | 32.1k | _ASSERT(--(topological_map[i].num_out) == 0, |
990 | 32.1k | "More than one outbound edge in cycle"); |
991 | 64.6k | for (dst = i, src = topological_map[i].in_reg; src != i; |
992 | 32.5k | dst = src, src = topological_map[src].in_reg) { |
993 | 32.5k | _ASSERT(src >= 0, "No inbound edge (cycle breaking)"); |
994 | 32.5k | INSERT_MOVE(dst, src); |
995 | 32.5k | _ASSERT(--(topological_map[src].num_out) == 0, |
996 | 32.5k | "More than one outbound edge in cycle"); |
997 | 32.5k | } |
998 | 32.1k | INSERT_MOVE(dst, spare_register); |
999 | 32.1k | } |
1000 | 514k | } |
1001 | 32.1k | } |
1002 | 303k | |
1003 | 303k | /* now all that remains is to deal with spilled values */ |
1004 | 376k | for (i = 0; i < spilled_args_top; i++) { |
1005 | 72.9k | MVMint32 arg = spilled_args[i]; |
1006 | 72.9k | LiveRange *v = alc->values + arg_values[arg]; |
1007 | 72.9k | if (storage_refs[arg]._cls == MVM_JIT_STORAGE_GPR) { |
1008 | 72.9k | _DEBUG("Loading spilled value to Rq(%d) from [rbx+%d]", storage_refs[arg]._pos, v->spill_pos); |
1009 | 72.9k | INSERT_LOAD_LOCAL(storage_refs[arg]._pos, v->spill_pos); |
1010 | 0 | } else if (storage_refs[arg]._cls == MVM_JIT_STORAGE_STACK) { |
1011 | 0 | INSERT_LOCAL_STACK_COPY(storage_refs[arg]._pos, v->spill_pos); |
1012 | 0 | } else { |
1013 | 0 | NYI(storage_class); |
1014 | 0 | } |
1015 | 72.9k | } |
1016 | 303k | } |
1017 | | |
1018 | 6.80M | MVM_STATIC_INLINE void process_tile(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 tile_cursor) { |
1019 | 6.80M | MVMJitTile *tile = list->items[tile_cursor]; |
1020 | 6.80M | |
1021 | 6.80M | if (tile->op == MVM_JIT_ARGLIST) { |
1022 | 303k | MVMint32 arglist_idx = tile_cursor; |
1023 | 303k | MVMint32 call_idx = tile_cursor + 1; |
1024 | 303k | _ASSERT(call_idx < list->items_num && |
1025 | 303k | (list->items[call_idx]->op == MVM_JIT_CALL || |
1026 | 303k | list->items[call_idx]->op == MVM_JIT_CALLV), |
1027 | 303k | "ARGLIST tiles must be followed by CALL"); |
1028 | 6.49M | } else if (tile->op == MVM_JIT_CALL || tile->op == MVM_JIT_CALLV) { |
1029 | 303k | MVMint32 arglist_idx = tile_cursor - 1; |
1030 | 303k | MVMint32 call_idx = tile_cursor; |
1031 | 303k | _ASSERT(tile_cursor > 0 && list->items[tile_cursor - 1]->op == MVM_JIT_ARGLIST, |
1032 | 303k | "CALL must be preceded by ARGLIST"); |
1033 | 303k | /* |
1034 | 303k | * CALL nodes can use values in registers, for example for dynamic |
1035 | 303k | * calls. These registers may conflict with registers used in ARGLIST, |
1036 | 303k | * in which case prepare_arglist_and_call will move the values to a free |
1037 | 303k | * register and update the call tile. |
1038 | 303k | * |
1039 | 303k | * However, as regular register-allocated values, the selected register |
1040 | 303k | * may be allocated for a synthetic LOAD tile after it had previously |
1041 | 303k | * been spilled. To ensure that allocation for the synthetic tile does |
1042 | 303k | * not overwrite the free register picked by the resolution code, we |
1043 | 303k | * must be sure that prepare_arglist_and_call will be run *after* all |
1044 | 303k | * registers have been allocated for the values used by the CALL tile. |
1045 | 303k | * |
1046 | 303k | * That is why prepare_arglist_and_call, which handles BOTH tiles, is |
1047 | 303k | * called for the CALL tile and not for the ARGLIST tile. |
1048 | 303k | */ |
1049 | 303k | |
1050 | 303k | prepare_arglist_and_call(tc, alc, list, arglist_idx, call_idx); |
1051 | 6.19M | } else { |
1052 | 6.19M | MVMint32 i; |
1053 | 6.19M | /* deal with 'use' registers */ |
1054 | 7.23M | for (i = 1; i < tile->num_refs; i++) { |
1055 | 1.03M | MVMint8 spec = MVM_JIT_REGISTER_FETCH(tile->register_spec, i); |
1056 | 1.03M | if (MVM_JIT_REGISTER_IS_USED(spec) && MVM_JIT_REGISTER_HAS_REQUIREMENT(spec)) { |
1057 | 0 | /* we could use the register map here, but let's wait and see */ |
1058 | 0 | NYI(tile_use_requirements); |
1059 | 0 | } |
1060 | 1.03M | } |
1061 | 6.19M | } |
1062 | 6.80M | } |
1063 | | |
1064 | 3.57M | static void process_live_range(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 v) { |
1065 | 3.57M | MVMint8 reg; |
1066 | 3.57M | MVMint32 tile_order_nr = alc->values[v].start; |
1067 | 3.57M | if (MVM_JIT_REGISTER_HAS_REQUIREMENT(alc->values[v].register_spec)) { |
1068 | 1.42M | reg = MVM_JIT_REGISTER_REQUIREMENT(alc->values[v].register_spec); |
1069 | 1.42M | if (MVM_bitmap_get_low(NVR_GPR_BITMAP, reg)) { |
1070 | 1.42M | assign_register(tc, alc, list, v, MVM_JIT_STORAGE_NVR, reg); |
1071 | 0 | } else { |
1072 | 0 | /* TODO; might require swapping / spilling */ |
1073 | 0 | NYI(general_purpose_register_spec); |
1074 | 0 | } |
1075 | 2.15M | } else { |
1076 | 2.15M | while ((reg = get_register(tc, alc, MVM_JIT_STORAGE_GPR)) < 0) { |
1077 | 90 | /* choose a live range, a register to spill, and a spill location */ |
1078 | 90 | MVMint32 to_spill = select_live_range_for_spill(tc, alc, list, tile_order_nr); |
1079 | 90 | MVMint32 spill_pos = MVM_jit_spill_memory_select(tc, alc->compiler, alc->values[to_spill].reg_type); |
1080 | 90 | active_set_splice(tc, alc, to_spill); |
1081 | 90 | _DEBUG("Spilling live range %d at %d to %d to free up a register", |
1082 | 90 | to_spill, tile_order_nr, spill_pos); |
1083 | 90 | live_range_spill(tc, alc, list, to_spill, spill_pos, tile_order_nr); |
1084 | 90 | } |
1085 | 2.15M | assign_register(tc, alc, list, v, MVM_JIT_STORAGE_GPR, reg); |
1086 | 2.15M | active_set_add(tc, alc, v); |
1087 | 2.15M | } |
1088 | 3.57M | |
1089 | 3.57M | } |
1090 | | |
1091 | 253k | static void linear_scan(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list) { |
1092 | 253k | MVMint32 tile_cursor = 0; |
1093 | 253k | MVM_VECTOR_INIT(alc->retired, alc->worklist_num); |
1094 | 253k | MVM_VECTOR_INIT(alc->spilled, 8); |
1095 | 253k | _DEBUG("STARTING LINEAR SCAN: %d/%d", tc->instance->jit_seq_nr, list->tree->seq_nr); |
1096 | 253k | /* loop over all tiles and peek on the value heap */ |
1097 | 10.6M | while (tile_cursor < list->items_num) { |
1098 | 10.3M | |
1099 | 10.3M | if (alc->worklist_num > 0 && live_range_heap_peek(alc->values, alc->worklist) < order_nr(tile_cursor)) { |
1100 | 3.57M | /* we've processed all tiles prior to this live range */ |
1101 | 3.57M | MVMint32 live_range = live_range_heap_pop(alc->values, alc->worklist, &alc->worklist_num, values_cmp_first_ref); |
1102 | 3.57M | MVMint32 tile_order_nr = alc->values[live_range].start; |
1103 | 3.57M | _DEBUG("Processing live range %d (first ref %d, last ref %d)", |
1104 | 3.57M | live_range, alc->values[live_range].start, alc->values[live_range].end); |
1105 | 3.57M | active_set_expire(tc, alc, tile_order_nr); |
1106 | 3.57M | /* We can release the spill slot only if there is no more active |
1107 | 3.57M | * live range overlapping with its extent. Otherwise, when we reuse |
1108 | 3.57M | * the slot, we risk overwriting a useful value. |
1109 | 3.57M | * |
1110 | 3.57M | * We pass the current tile_order_nr as the upper bound (e.g. when |
1111 | 3.57M | * there may be no active live ranges, slots will still be useful if |
1112 | 3.57M | * they have later uses */ |
1113 | 3.57M | spill_set_release(tc, alc, earliest_active_value(tc,alc, tile_order_nr)); |
1114 | 3.57M | process_live_range(tc, alc, list, live_range); |
1115 | 6.80M | } else { |
1116 | 6.80M | /* still have tiles to process, increment cursor */ |
1117 | 6.80M | process_tile(tc, alc, list, tile_cursor++); |
1118 | 6.80M | } |
1119 | 10.3M | } |
1120 | 253k | |
1121 | 253k | /* flush active live ranges */ |
1122 | 253k | active_set_expire(tc, alc, order_nr(list->items_num) + 1); |
1123 | 253k | spill_set_release(tc, alc, order_nr(list->items_num) + 1); |
1124 | 253k | _DEBUG("END OF LINEAR SCAN%s","\n"); |
1125 | 253k | } |
1126 | | |
1127 | | |
1128 | 253k | void MVM_jit_linear_scan_allocate(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitTileList *list) { |
1129 | 253k | RegisterAllocator alc; |
1130 | 253k | /* initialize allocator */ |
1131 | 253k | alc.compiler = compiler; |
1132 | 253k | /* restart spill stack */ |
1133 | 253k | |
1134 | 253k | alc.active_top = 0; |
1135 | 253k | memset(alc.active, -1, sizeof(alc.active)); |
1136 | 253k | |
1137 | 253k | alc.reg_give = alc.reg_take = 0; |
1138 | 253k | memcpy(alc.reg_ring, available_gpr, |
1139 | 253k | sizeof(available_gpr)); |
1140 | 253k | |
1141 | 253k | /* run algorithm */ |
1142 | 253k | determine_live_ranges(tc, &alc, list); |
1143 | 253k | linear_scan(tc, &alc, list); |
1144 | 253k | |
1145 | 253k | /* deinitialize allocator */ |
1146 | 253k | MVM_free(alc.sets); |
1147 | 253k | MVM_free(alc.refs); |
1148 | 253k | MVM_free(alc.holes); |
1149 | 253k | MVM_free(alc.values); |
1150 | 253k | |
1151 | 253k | MVM_free(alc.worklist); |
1152 | 253k | MVM_free(alc.retired); |
1153 | 253k | MVM_free(alc.spilled); |
1154 | 253k | |
1155 | 253k | |
1156 | 253k | /* make edits effective */ |
1157 | 253k | MVM_jit_tile_list_edit(tc, list); |
1158 | 253k | |
1159 | 253k | } |