Coverage Report

Created: 2018-07-03 15:31

/home/travis/build/MoarVM/MoarVM/src/jit/linear_scan.c
Line
Count
Source (jump to first uncovered line)
1
#include "moar.h"
2
#include "internal.h"
3
4
#define __COMMA__ ,
5
static MVMint8 available_gpr[] = {
6
    MVM_JIT_ARCH_AVAILABLE_GPR(MVM_JIT_REG)
7
};
8
static MVMint8 available_num[] = {
9
    MVM_JIT_ARCH_NUM(MVM_JIT_REG)
10
};
11
/* bitmap, so make it '|' to combine the shifted register numbers */
12
#undef __COMMA__
13
#define __COMMA__ |
14
#define SHIFT(x) (1 << (MVM_JIT_REG(x)))
15
static const MVMBitmap NVR_GPR_BITMAP = MVM_JIT_ARCH_NONVOLATILE_GPR(SHIFT);
16
static const MVMBitmap AVAILABLE_GPR_BITMAP = MVM_JIT_ARCH_AVAILABLE_GPR(SHIFT);
17
#undef SHIFT
18
#undef __COMMA__
19
20
21
#define MAX_ACTIVE sizeof(available_gpr)
22
0
#define NYI(x) MVM_oops(tc, #x  "not yet implemented")
23
2.87M
#define _ASSERT(b, msg) if (!(b)) do { MVM_panic(1, msg); } while (0)
24
25
#if MVM_JIT_DEBUG
26
#define _DEBUG(fmt, ...) do { fprintf(stderr, fmt "%s", __VA_ARGS__, "\n"); } while(0)
27
#else
28
19.5M
#define _DEBUG(fmt, ...) do {} while(0)
29
#endif
30
31
32
typedef struct {
33
    MVMint32 key;
34
    MVMint32 idx;
35
} UnionFind;
36
37
typedef struct ValueRef ValueRef;
38
struct ValueRef {
39
    MVMint32  tile_idx;
40
    MVMint32  value_idx;
41
    ValueRef *next;
42
};
43
44
struct Hole {
45
    MVMint32 start, end;
46
    struct Hole *next;
47
};
48
49
typedef struct {
50
    /* double-ended queue of value refs */
51
    ValueRef *first, *last;
52
    /* order number of first and last refs */
53
    MVMint32 start, end;
54
55
    /* list of holes in ascending order */
56
    struct Hole *holes;
57
58
    /* We can have at most two synthetic tiles, one attached to the first
59
     * definition and one to the last use... we could also point directly into
60
     * the values array of the tile, but it is not directly necessary */
61
    MVMJitTile *synthetic[2];
62
63
    MVMint8            register_spec;
64
    MVMJitStorageClass reg_cls;
65
    MVMint32           reg_num;
66
    MVMint8            reg_type;
67
68
69
    MVMint32           spill_pos;
70
    MVMint32           spill_idx;
71
} LiveRange;
72
73
74
typedef struct {
75
    MVMJitCompiler *compiler;
76
77
    /* Sets of values */
78
    UnionFind *sets;
79
80
    /* single buffer for uses, definitions */
81
    ValueRef *refs;
82
    MVMint32  refs_num;
83
84
    /* single buffer for number of holes */
85
    struct Hole *holes;
86
    MVMint32 holes_top;
87
88
    /* All values ever defined by the register allcoator */
89
    MVM_VECTOR_DECL(LiveRange, values);
90
91
    /* 'Currently' active values */
92
    MVMint32 active_top;
93
    MVMint32 active[MAX_ACTIVE];
94
95
    /* Values still left to do (heap) */
96
    MVM_VECTOR_DECL(MVMint32, worklist);
97
    /* Retired values (to be assigned registers) (heap) */
98
    MVM_VECTOR_DECL(MVMint32, retired);
99
    /* Spilled values */
100
    MVM_VECTOR_DECL(MVMint32, spilled);
101
102
103
    /* Register handout ring */
104
    MVMint8   reg_ring[MAX_ACTIVE];
105
    MVMint32  reg_give, reg_take;
106
107
} RegisterAllocator;
108
109
110
/* For first/last ref comparison, the tile indexes are doubled, and the indexes
111
 * of synthetics are biased with +1/-1. We use this extra space on the number
112
 * line to ensure consistent ordering and expiring behavior for 'synthetic' live
113
 * ranges that either start before an instruction (loading a required value) or
114
 * end just after one (storing the produced value). Without this, ordering
115
 * problems can cause two 'atomic' live ranges to be allocated and expired
116
 * before their actual last use */
117
45.1M
MVM_STATIC_INLINE MVMint32 order_nr(MVMint32 tile_idx) {
118
45.1M
    return tile_idx * 2;
119
45.1M
}
120
121
122
484k
MVM_STATIC_INLINE MVMint32 is_definition(ValueRef *v) {
123
484k
    return (v->value_idx == 0);
124
484k
}
125
126
9.12M
MVM_STATIC_INLINE MVMint32 is_arglist_ref(MVMJitTileList *list, ValueRef *v) {
127
9.12M
    return (list->items[v->tile_idx]->op == MVM_JIT_ARGLIST);
128
9.12M
}
129
130
131
1.55M
MVM_STATIC_INLINE MVMint32 live_range_is_empty(LiveRange *range) {
132
1.55M
    return (range->first == NULL &&
133
100k
            range->synthetic[0] == NULL &&
134
100k
            range->synthetic[1] == NULL);
135
1.55M
}
136
137
/* allocate a new live range value by pointer-bumping */
138
3.96M
MVMint32 live_range_init(RegisterAllocator *alc) {
139
3.96M
    LiveRange *range;
140
3.96M
    MVMint32 idx = alc->values_num++;
141
3.96M
    MVM_VECTOR_ENSURE_SIZE(alc->values, idx);
142
3.96M
    alc->values[idx].spill_idx = INT32_MAX;
143
3.96M
    alc->values[idx].start     = INT32_MAX;
144
3.96M
    return idx;
145
3.96M
}
146
147
148
/* append ref to end of queue */
149
8.08M
static void live_range_add_ref(RegisterAllocator *alc, LiveRange *range, MVMint32 tile_idx, MVMint32 value_idx) {
150
8.08M
    ValueRef *ref = alc->refs + alc->refs_num++;
151
8.08M
152
8.08M
    ref->tile_idx  = tile_idx;
153
8.08M
    ref->value_idx = value_idx;
154
8.08M
155
8.08M
    if (range->first == NULL) {
156
3.47M
        range->first = ref;
157
3.47M
    }
158
8.08M
    if (range->last != NULL) {
159
4.61M
        range->last->next = ref;
160
4.61M
    }
161
8.08M
    range->last = ref;
162
8.08M
    ref->next   = NULL;
163
8.08M
164
8.08M
    range->start = MIN(order_nr(tile_idx), range->start);
165
8.08M
    range->end   = MAX(order_nr(tile_idx), range->end);
166
8.08M
}
167
168
169
/* merge value ref sets */
170
100k
static void live_range_merge(LiveRange *a, LiveRange *b) {
171
100k
    ValueRef *head = NULL, *tail = NULL;
172
100k
    MVMint32 i;
173
100k
    _DEBUG("Merging live ranges (%d-%d) and (%d-%d)",
174
100k
           (a)->start, (a)->end, (b)->start, (b)->end);
175
100k
    if (a->start <= b->start) {
176
100k
        head = a->first;
177
100k
        a->first = a->first->next;
178
0
    } else {
179
0
        head = b->first;
180
0
        b->first = b->first->next;
181
0
    }
182
100k
    tail = head;
183
210k
    while (a->first != NULL && b->first != NULL) {
184
110k
        if (a->first->tile_idx <= b->first->tile_idx) {
185
110k
            tail->next  = a->first;
186
110k
            a->first    = a->first->next;
187
0
        } else {
188
0
            tail->next  = b->first;
189
0
            b->first    = b->first->next;
190
0
        }
191
110k
        tail = tail->next;
192
110k
    }
193
100k
    while (a->first != NULL) {
194
0
        tail->next = a->first;
195
0
        a->first   = a->first->next;
196
0
        tail       = tail->next;
197
0
    }
198
246k
    while (b->first != NULL) {
199
145k
        tail->next  = b->first;
200
145k
        b->first    = b->first->next;
201
145k
        tail        = tail->next;
202
145k
    }
203
100k
204
100k
    a->first = head;
205
100k
    a->last  = tail;
206
100k
207
300k
    for (i = 0; i < 2; i++) {
208
200k
        if (b->synthetic[i] == NULL) {
209
200k
            continue;
210
200k
        }
211
0
        if (a->synthetic[i] != NULL) {
212
0
            MVM_panic(1, "Can't merge the same synthetic!");
213
0
        }
214
0
    }
215
100k
    a->start = MIN(a->start, b->start);
216
100k
    a->end   = MAX(a->end, b->end);
217
100k
    /* deinitialize the live range */
218
100k
    b->start = INT32_MAX;
219
100k
    b->end   = 0;
220
100k
}
221
222
105k
static struct Hole * live_range_has_hole(LiveRange *value, MVMint32 order_nr) {
223
105k
    struct Hole *h;
224
105k
    /* By construction these are in linear ascending order, and never overlap */
225
124k
    for (h = value->holes; h != NULL && h->start <= order_nr; h = h->next) {
226
58.0k
        if (h->end >= order_nr)
227
38.5k
            return h;
228
58.0k
    }
229
66.5k
    return NULL;
230
105k
}
231
232
233
10.5M
UnionFind * value_set_find(UnionFind *sets, MVMint32 key) {
234
12.4M
    while (sets[key].key != key) {
235
1.90M
        key = sets[key].key;
236
1.90M
    }
237
10.5M
    return sets + key;
238
10.5M
}
239
240
241
100k
MVMint32 value_set_union(UnionFind *sets, LiveRange *values, MVMint32 a, MVMint32 b) {
242
100k
    /* dereference the sets to their roots */
243
100k
    a = value_set_find(sets, a)->key;
244
100k
    b = value_set_find(sets, b)->key;
245
100k
    if (a == b) {
246
0
        /* secretly the same set anyway, could happen in some combinations of
247
0
         * IF, COPY, and DO. */
248
0
        return a;
249
0
    }
250
100k
    if (values[sets[b].idx].start < values[sets[a].idx].start) {
251
0
        /* ensure we're picking the first one to start so that we maintain the
252
0
         * first-definition heap order */
253
0
        MVMint32 t = a; a = b; b = t;
254
0
    }
255
100k
    sets[b].key = a; /* point b to a */
256
100k
    live_range_merge(values + sets[a].idx, values + sets[b].idx);
257
100k
    return a;
258
100k
}
259
260
261
8.11M
MVM_STATIC_INLINE void heap_swap(MVMint32 *heap, MVMint32 a, MVMint32 b) {
262
8.11M
    MVMint32 t = heap[a];
263
8.11M
    heap[a]    = heap[b];
264
8.11M
    heap[b]    = t;
265
8.11M
}
266
267
/* Functions to maintain a heap of references to the live ranges */
268
void live_range_heap_down(LiveRange *values, MVMint32 *heap, MVMint32 top, MVMint32 item,
269
3.64M
                          MVMint32 (*cmp)(LiveRange *values, MVMint32 a, MVMint32 b)) {
270
11.5M
    while (item < top) {
271
11.2M
        MVMint32 left = item * 2 + 1;
272
11.2M
        MVMint32 right = left + 1;
273
11.2M
        MVMint32 swap;
274
11.2M
        if (right < top) {
275
4.60M
            swap = cmp(values, heap[left], heap[right]) < 0 ? left : right;
276
3.43M
        } else if (left < top) {
277
645k
            swap = left;
278
2.79M
        } else {
279
2.79M
            break;
280
2.79M
        }
281
8.45M
        if (cmp(values, heap[swap], heap[item]) < 0) {
282
7.90M
            heap_swap(heap, swap, item);
283
7.90M
            item       = swap;
284
552k
        } else {
285
552k
            break;
286
552k
        }
287
8.45M
    }
288
3.64M
}
289
290
void live_range_heap_up(LiveRange *values, MVMint32 *heap, MVMint32 item,
291
268k
                        MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) {
292
479k
    while (item > 0) {
293
407k
        MVMint32 parent = (item-1)/2;
294
407k
        if (cmp(values, heap[parent], heap[item]) > 0) {
295
211k
            heap_swap(heap, item, parent);
296
211k
            item = parent;
297
196k
        } else {
298
196k
            break;
299
196k
        }
300
407k
    }
301
268k
}
302
303
MVMint32 live_range_heap_pop(LiveRange *values, MVMint32 *heap, size_t *top,
304
3.64M
                             MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) {
305
3.64M
    MVMint32 v = heap[0];
306
3.64M
    MVMint32 t = --(*top);
307
3.64M
    /* pop by swap and heap-down */
308
3.64M
    heap[0]    = heap[t];
309
3.64M
    live_range_heap_down(values, heap, t, 0, cmp);
310
3.64M
    return v;
311
3.64M
}
312
313
void live_range_heap_push(LiveRange *values, MVMint32 *heap, size_t *top, MVMint32 v,
314
268k
                          MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) {
315
268k
    /* NB, caller should use MVM_ENSURE_SPACE prior to calling */
316
268k
    MVMint32 t = (*top)++;
317
268k
    heap[t] = v;
318
268k
    live_range_heap_up(values, heap, t, cmp);
319
268k
}
320
321
9.54M
MVMint32 live_range_heap_peek(LiveRange *values, MVMint32 *heap) {
322
9.54M
    return values[heap[0]].start;
323
9.54M
}
324
325
void live_range_heapify(LiveRange *values, MVMint32 *heap, MVMint32 top,
326
0
                        MVMint32 (*cmp)(LiveRange* values, MVMint32 a, MVMint32 b)) {
327
0
    MVMint32 i = top, mid = top/2;
328
0
    while (i-- > mid) {
329
0
        live_range_heap_up(values, heap, i, cmp);
330
0
    }
331
0
}
332
333
334
16.6M
MVMint32 values_cmp_first_ref(LiveRange *values, MVMint32 a, MVMint32 b) {
335
16.6M
    return values[a].start - values[b].start;
336
16.6M
}
337
338
24.6k
MVMint32 values_cmp_last_ref(LiveRange *values, MVMint32 a, MVMint32 b) {
339
24.6k
    return values[a].end - values[b].end;
340
24.6k
}
341
342
/* register assignment logic */
343
4.30M
#define NEXT_IN_RING(a,x) (((x)+1) == MVM_ARRAY_SIZE(a) ? 0 : ((x)+1))
344
2.15M
MVMint8 get_register(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitStorageClass reg_cls) {
345
2.15M
    /* ignore storage class for now */
346
2.15M
    MVMint8 reg_num;
347
2.15M
    reg_num       = alc->reg_ring[alc->reg_take];
348
2.15M
    if (reg_num >= 0) {
349
2.15M
        /* not empty */
350
2.15M
        alc->reg_ring[alc->reg_take] = -1; /* mark used */
351
2.15M
        alc->reg_take = NEXT_IN_RING(alc->reg_ring, alc->reg_take);
352
2.15M
    }
353
2.15M
    return reg_num;
354
2.15M
}
355
356
2.15M
void free_register(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitStorageClass reg_cls, MVMint8 reg_num) {
357
2.15M
    if (alc->reg_ring[alc->reg_give] != -1) {
358
0
        MVM_oops(tc, "No space to release register %d to ring", reg_num);
359
0
    }
360
2.15M
    alc->reg_ring[alc->reg_give] = reg_num;
361
2.15M
    alc->reg_give = NEXT_IN_RING(alc->reg_ring, alc->reg_give);
362
2.15M
}
363
364
void assign_register(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list,
365
3.86M
                     MVMint32 lv, MVMJitStorageClass reg_cls,  MVMint8 reg_num) {
366
3.86M
    /* What to do here:
367
3.86M
     * - update tiles using this live range to refer to this register
368
3.86M
     * - update allocator to mark this register as used by this live range */
369
3.86M
    LiveRange *range = alc->values + lv;
370
3.86M
    ValueRef *ref;
371
3.86M
    MVMint32 i;
372
3.86M
373
3.86M
    range->reg_cls   = reg_cls;
374
3.86M
    range->reg_num   = reg_num;
375
12.4M
    for (ref = range->first; ref != NULL; ref = ref->next) {
376
8.57M
        if (is_arglist_ref(list, ref)) {
377
1.13M
            /* don't assign registers to ARGLIST references, that will never
378
1.13M
             * work */
379
1.13M
            continue;
380
7.43M
        } else {
381
7.43M
            MVMJitTile *tile = list->items[ref->tile_idx];
382
7.43M
            tile->values[ref->value_idx] = reg_num;
383
7.43M
        }
384
8.57M
    }
385
3.86M
386
11.5M
    for (i = 0; i < 2; i++) {
387
7.72M
        MVMJitTile *tile = range->synthetic[i];
388
7.72M
        if (tile != NULL) {
389
484k
            tile->values[i] = reg_num;
390
484k
        }
391
7.72M
    }
392
3.86M
}
393
394
395
1.80M
MVM_STATIC_INLINE void close_hole(RegisterAllocator *alc, MVMint32 ref, MVMint32 tile_idx) {
396
1.80M
    LiveRange *v = alc->values + ref;
397
1.80M
    if (v->holes && v->holes->start < order_nr(tile_idx)) {
398
94.1k
        v->holes->start = order_nr(tile_idx);
399
94.1k
        _DEBUG("Closed hole in live range %d at %d", ref, order_nr(tile_idx));
400
94.1k
    }
401
1.80M
}
402
403
1.55M
MVM_STATIC_INLINE void open_hole(RegisterAllocator *alc, MVMint32 ref, MVMint32 tile_idx) {
404
1.55M
    LiveRange *v = alc->values + ref;
405
1.55M
    if (v->start < order_nr(tile_idx) &&
406
100k
        (v->holes == NULL || v->holes->start > order_nr(tile_idx))) {
407
100k
        struct Hole *hole = alc->holes + alc->holes_top++;
408
100k
        hole->next  = v->holes;
409
100k
        hole->start = 0;
410
100k
        hole->end   = order_nr(tile_idx);
411
100k
        v->holes    = hole;
412
100k
        _DEBUG("Opened hole in live range %d at %d", ref, order_nr(tile_idx));
413
100k
    }
414
1.55M
}
415
416
/* Find holes in live ranges, as per Wimmer (2010). This is required only
417
 * because the spill-strategy arround CALLs is (sometimes) to load-and-restore,
418
 * rather than do a full spill, in the not-so-rare case that many of the live
419
 * values will be temporaries and the call is only necessary in a branch */
420
421
58.8k
static void find_holes(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list) {
422
58.8k
    MVMint32 i, j, k;
423
58.8k
424
58.8k
    MVMint32 bitmap_size = (alc->values_num >> 5) + 1;
425
58.8k
426
58.8k
 /* convenience macros */
427
1.39M
#define _BITMAP(_a)   (bitmaps + (_a)*bitmap_size)
428
58.8k
#define _SUCC(_a, _z) (list->blocks[(_a)].succ[(_z)])
429
58.8k
430
58.8k
    MVMBitmap *bitmaps = MVM_calloc(list->blocks_num + 1, sizeof(MVMBitmap) * bitmap_size);
431
58.8k
    /* last bitmap is allocated to hold diff, which is how we know which live
432
58.8k
     * ranges holes potentially need to be closed */
433
58.8k
    MVMBitmap *diff    = _BITMAP(list->blocks_num);
434
58.8k
435
634k
    for (j = list->blocks_num - 1; j >= 0; j--) {
436
575k
        MVMBitmap *live_in = _BITMAP(j);
437
575k
        MVMint32 start = list->blocks[j].start, end = list->blocks[j].end;
438
575k
        if (list->blocks[j].num_succ == 2) {
439
238k
            /* live out is union of successors' live_in */
440
238k
            MVMBitmap *a = _BITMAP(_SUCC(j, 0)), *b =  _BITMAP(_SUCC(j, 1));
441
238k
442
238k
            MVM_bitmap_union(live_in, a, b, bitmap_size);
443
238k
            MVM_bitmap_difference(diff, a, b, bitmap_size);
444
238k
445
670k
            for (k = 0; k < bitmap_size; k++) {
446
432k
                MVMBitmap additions = diff[k];
447
763k
                while (additions) {
448
331k
                    MVMint32 bit = MVM_FFS(additions) - 1;
449
331k
                    MVMint32 val = (k << 6) + bit;
450
331k
                    close_hole(alc, val, end);
451
331k
                    MVM_bitmap_delete(&additions, bit);
452
331k
                }
453
432k
            }
454
337k
        } else if (list->blocks[j].num_succ == 1) {
455
278k
            memcpy(live_in, _BITMAP(_SUCC(j, 0)),
456
278k
                   sizeof(MVMBitmap) * bitmap_size);
457
278k
        }
458
575k
459
4.05M
        for (i = end - 1; i >= start; i--) {
460
3.48M
            MVMJitTile *tile = list->items[i];
461
3.48M
            if (tile->op == MVM_JIT_ARGLIST) {
462
135k
                /* list of uses, all very real */
463
135k
                MVMint32 nchild = list->tree->nodes[tile->node + 1];
464
585k
                for (k = 0; k < nchild; k++) {
465
449k
                    MVMint32 carg = list->tree->nodes[tile->node + 2 + k];
466
449k
                    MVMint32 ref  = value_set_find(alc->sets, list->tree->nodes[carg + 1])->idx;
467
449k
                    if (!MVM_bitmap_get(live_in, ref)) {
468
427k
                        MVM_bitmap_set(live_in, ref);
469
427k
                        close_hole(alc, ref, i);
470
427k
                    }
471
449k
                }
472
3.34M
            } else if (tile->op == MVM_JIT_IF || tile->op == MVM_JIT_DO || tile->op == MVM_JIT_COPY) {
473
295k
                /* not a real use, no work needed here (we already merged them) */
474
3.05M
            } else {
475
3.05M
                /* If a value is used and defined by the same tile, then the
476
3.05M
                 * 'hole' only covers that single tile. The definitions must
477
3.05M
                 * therefore be handled before the uses */
478
3.05M
479
3.05M
                if (MVM_JIT_TILE_YIELDS_VALUE(tile)) {
480
1.55M
                    MVMint32 ref = value_set_find(alc->sets, tile->node)->idx;
481
1.55M
                    open_hole(alc, ref, i);
482
1.55M
                    MVM_bitmap_delete(live_in, ref);
483
1.55M
                }
484
5.18M
                for (k = 0; k < tile->num_refs; k++) {
485
2.12M
                    if (MVM_JIT_REGISTER_IS_USED(MVM_JIT_REGISTER_FETCH(tile->register_spec, k+1))) {
486
1.74M
                        MVMint32 ref = value_set_find(alc->sets, tile->refs[k])->idx;
487
1.74M
                        if (!MVM_bitmap_get(live_in, ref)) {
488
1.04M
                            MVM_bitmap_set(live_in, ref);
489
1.04M
                            close_hole(alc, ref, i);
490
1.04M
                        }
491
1.74M
                    }
492
2.12M
                }
493
3.05M
            }
494
3.48M
        }
495
575k
    }
496
58.8k
    MVM_free(bitmaps);
497
58.8k
498
58.8k
#undef _BITMAP
499
58.8k
#undef _SUCC
500
58.8k
}
501
502
503
253k
static void determine_live_ranges(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list) {
504
253k
    MVMJitExprTree *tree = list->tree;
505
253k
    MVMint32 i, j, n;
506
253k
    MVMint32 num_phi = 0; /* pessimistic but correct upper bound of number of holes */
507
253k
508
253k
    alc->sets = MVM_calloc(tree->nodes_num, sizeof(UnionFind));
509
253k
    /* up to 4 refs per tile (1 out, 3 in) plus the number of refs per arglist */
510
253k
    alc->refs = MVM_calloc(list->items_num * 4 + list->num_arglist_refs, sizeof(ValueRef));
511
253k
    alc->refs_num = 0;
512
253k
513
253k
    MVM_VECTOR_INIT(alc->values,   list->items_num);
514
253k
    MVM_VECTOR_INIT(alc->worklist, list->items_num);
515
253k
516
7.05M
    for (i = 0; i < list->items_num; i++) {
517
6.80M
        MVMJitTile *tile = list->items[i];
518
6.80M
        MVMint32    node = tile->node;
519
6.80M
        /* Each of the following counts as either an alias or as a PHI (in case
520
6.80M
         * of IF), and thus these are not actual definitions */
521
6.80M
        if (tile->op == MVM_JIT_COPY) {
522
189k
            MVMint32 ref        = tree->nodes[tile->node + 1];
523
189k
            _DEBUG("Unify COPY node (%d -> %d)", tile->node, ref);
524
189k
            alc->sets[node].key = ref; /* point directly to actual definition */
525
6.61M
        } else if (tile->op == MVM_JIT_DO) {
526
111k
            MVMint32 nchild     = tree->nodes[tile->node + 1];
527
111k
            MVMint32 ref        = tree->nodes[tile->node + 1 + nchild];
528
111k
            _DEBUG("Unify COPY DO (%d -> %d)", tile->node, ref);
529
111k
            alc->sets[node].key = ref;
530
6.50M
        } else if (tile->op == MVM_JIT_IF) {
531
100k
            MVMint32 left_cond   = tree->nodes[tile->node + 2];
532
100k
            MVMint32 right_cond  = tree->nodes[tile->node + 3];
533
100k
            /* NB; this may cause a conflict in register requirements, in which
534
100k
             * case we should resolve it by creating a new live range or inserting
535
100k
             * a copy */
536
100k
            alc->sets[node].key  = value_set_union(alc->sets, alc->values, left_cond, right_cond);
537
100k
            _DEBUG("Merging nodes %d and %d to %d (result key = %d)", left_cond, right_cond, node, alc->sets[node].key);
538
100k
            num_phi++;
539
6.40M
        } else if (tile->op == MVM_JIT_ARGLIST) {
540
303k
            MVMint32 num_args = tree->nodes[tile->node + 1];
541
303k
            MVMJitExprNode *refs = tree->nodes + tile->node + 2;
542
303k
            _DEBUG("Adding %d references to ARGLIST node", num_args);
543
1.41M
            for (j = 0; j < num_args; j++) {
544
1.11M
                MVMint32 carg  = refs[j];
545
1.11M
                MVMint32 value = list->tree->nodes[carg+1];
546
1.11M
                MVMint32 idx   = value_set_find(alc->sets, value)->idx;
547
1.11M
                _DEBUG("  Reference %d", idx);
548
1.11M
                live_range_add_ref(alc, alc->values + idx, i, j + 1);
549
1.11M
                /* include the CALL node into the arglist child range, so we
550
1.11M
                 * don't release them too early */
551
1.11M
                alc->values[idx].end = MAX(alc->values[idx].end, order_nr(i + 1));
552
1.11M
            }
553
6.09M
        } else {
554
6.09M
            /* create a live range if necessary */
555
6.09M
            if (MVM_JIT_TILE_YIELDS_VALUE(tile)) {
556
3.47M
                MVMint8 register_spec = MVM_JIT_REGISTER_FETCH(tile->register_spec, 0);
557
3.47M
                MVMint32 idx          = live_range_init(alc);
558
3.47M
                alc->sets[node].key   = node;
559
3.47M
                alc->sets[node].idx   = idx;
560
3.47M
                _DEBUG("Create live range %d (tile=%d, node=%d)", idx,i, node);
561
3.47M
                live_range_add_ref(alc, alc->values + idx, i, 0);
562
3.47M
                if (MVM_JIT_REGISTER_HAS_REQUIREMENT(register_spec)) {
563
1.42M
                    alc->values[idx].register_spec = register_spec;
564
1.42M
                }
565
3.47M
                MVM_VECTOR_PUSH(alc->worklist, idx);
566
3.47M
            }
567
6.09M
            /* account for uses */
568
10.4M
            for (j = 0; j < tile->num_refs; j++) {
569
4.33M
                MVMint8  register_spec = MVM_JIT_REGISTER_FETCH(tile->register_spec, j+1);
570
4.33M
                /* any 'use' register requirements are handled in the allocation step */
571
4.33M
                if (MVM_JIT_REGISTER_IS_USED(register_spec)) {
572
3.50M
                    MVMint32 idx = value_set_find(alc->sets, tile->refs[j])->idx;
573
3.50M
                    _DEBUG("Adding reference to live range %d from tile %d", idx, i);
574
3.50M
                    live_range_add_ref(alc, alc->values + idx, i, j + 1);
575
3.50M
                }
576
4.33M
            }
577
6.09M
        }
578
6.80M
        if (MVM_JIT_TILE_YIELDS_VALUE(tile) && tree->info[node].opr_type != 0) {
579
888k
            LiveRange *range = alc->values + value_set_find(alc->sets, node)->idx;
580
888k
            _ASSERT(range->reg_type == 0 || (range->reg_type << 3) == tree->info[node].opr_type,
581
888k
                    "Register types do not match between value and node");
582
888k
            /* shift to match MVM_reg_types. should arguably be a macro maybe */
583
888k
            range->reg_type = tree->info[node].opr_type >> 3;
584
888k
            _DEBUG( "Assigned type: %d to live range %d\n", range->reg_type, range - alc->values);
585
888k
        }
586
6.80M
    }
587
253k
    if (num_phi > 0) {
588
58.8k
        /* If there are PHI nodes, there will be holes.
589
58.8k
         * The array allocated here will be used to construct linked lists */
590
58.8k
        alc->holes     = MVM_malloc(num_phi * sizeof(struct Hole));
591
58.8k
        alc->holes_top = 0;
592
58.8k
        find_holes(tc, alc, list);
593
58.8k
594
58.8k
        /* eliminate empty values from the worklist */
595
1.61M
        for (i = 0, j = 0; j < alc->worklist_num; j++) {
596
1.55M
            if (!live_range_is_empty(alc->values + alc->worklist[j])) {
597
1.45M
                alc->worklist[i++] = alc->worklist[j];
598
1.45M
            }
599
1.55M
        }
600
58.8k
        alc->worklist_num = i;
601
58.8k
602
194k
    } else {
603
194k
        alc->holes = NULL;
604
194k
        alc->holes_top = 0;
605
194k
    }
606
253k
}
607
608
/* The code below needs some thinking... */
609
2.15M
static void active_set_add(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 a) {
610
2.15M
    /* the original linear-scan heuristic for spilling is to take the last value
611
2.15M
     * in the set to expire, freeeing up the largest extent of code... that is a
612
2.15M
     * reasonably good heuristic, albeit not essential to the concept of linear
613
2.15M
     * scan. It makes sense to keep the stack ordered at all times (simplest by
614
2.15M
     * use of insertion sort). Although insertion sort is O(n^2), n is never
615
2.15M
     * large in this case (32 for RISC architectures, maybe, if we ever support
616
2.15M
     * them; 7 for x86-64. So the time spent on insertion sort is always small
617
2.15M
     * and bounded by a constant, hence O(1). Yes, algorithmics works this way
618
2.15M
     * :-) */
619
2.15M
    MVMint32 i;
620
3.65M
    for (i = 0; i < alc->active_top; i++) {
621
2.03M
        MVMint32 b = alc->active[i];
622
2.03M
        if (alc->values[b].end > alc->values[a].end) {
623
534k
            /* insert a before b */
624
534k
            memmove(alc->active + i + 1, alc->active + i, sizeof(MVMint32)*(alc->active_top - i));
625
534k
            alc->active[i] = a;
626
534k
            alc->active_top++;
627
534k
            return;
628
534k
        }
629
2.03M
    }
630
2.15M
    /* append at the end */
631
1.61M
    alc->active[alc->active_top++] = a;
632
1.61M
}
633
634
635
636
/* Take live ranges from active_set whose last use was after position and append them to the retired list */
637
3.83M
static void active_set_expire(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 order_nr) {
638
3.83M
    MVMint32 i;
639
5.91M
    for (i = 0; i < alc->active_top; i++) {
640
3.89M
        MVMint32 v = alc->active[i];
641
3.89M
        MVMint8 reg_num = alc->values[v].reg_num;
642
3.89M
        if (alc->values[v].end > order_nr) {
643
1.81M
            break;
644
2.08M
        } else {
645
2.08M
            _DEBUG("Live range %d is out of scope (last ref %d, %d) and releasing register %d",
646
2.08M
                   v, alc->values[v].end, order_nr, reg_num);
647
2.08M
            free_register(tc, alc, MVM_JIT_STORAGE_GPR, reg_num);
648
2.08M
        }
649
3.89M
    }
650
3.83M
    /* shift off the first x values from the live set. */
651
3.83M
    if (i > 0) {
652
1.39M
        MVM_VECTOR_APPEND(alc->retired, alc->active, i);
653
1.39M
        alc->active_top -= i;
654
1.39M
        if (alc->active_top > 0) {
655
410k
            memmove(alc->active, alc->active + i,
656
410k
                    sizeof(alc->active[0]) * alc->active_top);
657
410k
        }
658
1.39M
    }
659
3.83M
}
660
661
/* Compute the earliest live range that is still active. */
662
3.57M
static MVMint32 earliest_active_value(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 tile_order_nr) {
663
3.57M
    /* can we cache this, and does it make sense to do so? */
664
3.57M
    int i;
665
6.87M
    for (i = 0; i < alc->active_top; i++) {
666
3.29M
        tile_order_nr = MIN(tile_order_nr, alc->values[alc->active[i]].start);
667
3.29M
    }
668
3.57M
    return tile_order_nr;
669
3.57M
}
670
671
90
static void active_set_splice(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 to_splice) {
672
90
    MVMint32 i ;
673
90
    /* find (reverse, because it's usually the last); predecrement alc->active_top because we're removing one item */
674
90
    for (i = --alc->active_top; i >= 0; i--) {
675
90
        if (alc->active[i] == to_splice)
676
90
            break;
677
90
    }
678
90
    if (i >= 0 && i < alc->active_top) {
679
0
        /* shift out */
680
0
        memmove(alc->active + i, alc->active + i + 1,
681
0
                sizeof(alc->active[0]) * alc->active_top - i);
682
0
    }
683
90
}
684
685
3.83M
static void spill_set_release(MVMThreadContext *tc, RegisterAllocator *alc, MVMint32 order_nr) {
686
3.89M
    while (alc->spilled_num > 0 && alc->values[alc->spilled[0]].end <= order_nr) {
687
66.6k
        MVMint32 spilled = live_range_heap_pop(alc->values, alc->spilled, &alc->spilled_num,
688
66.6k
                                               values_cmp_last_ref);
689
66.6k
        LiveRange *value = alc->values + spilled;
690
66.6k
        _DEBUG("VM Register %d for live range %d can be released",
691
66.6k
               value->spill_pos / sizeof(MVMRegister), spilled);
692
66.6k
        MVM_jit_spill_memory_release(tc, alc->compiler, value->spill_pos, value->reg_type);
693
66.6k
    }
694
3.83M
}
695
696
static MVMint32 insert_load_before_use(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list,
697
397k
                                       ValueRef *ref, MVMint32 load_pos) {
698
397k
    MVMint32 n = live_range_init(alc);
699
397k
    MVMJitTile *tile = MVM_jit_tile_make(tc, alc->compiler, MVM_jit_compile_load, 2, 1,
700
397k
                                         MVM_JIT_STORAGE_LOCAL, load_pos, 0);
701
397k
    LiveRange *range = alc->values + n;
702
397k
    MVM_jit_tile_list_insert(tc, list, tile, ref->tile_idx - 1, +1); /* insert just prior to use */
703
397k
    range->synthetic[0] = tile;
704
397k
    range->first = range->last = ref;
705
397k
706
397k
    range->start = order_nr(ref->tile_idx) - 1;
707
397k
    range->end   = order_nr(ref->tile_idx);
708
397k
    return n;
709
397k
}
710
711
static MVMint32 insert_store_after_definition(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list,
712
86.0k
                                              ValueRef *ref, MVMint32 store_pos) {
713
86.0k
    MVMint32 n       = live_range_init(alc);
714
86.0k
    MVMJitTile *tile = MVM_jit_tile_make(tc, alc->compiler, MVM_jit_compile_store, 2, 2,
715
86.0k
                                         MVM_JIT_STORAGE_LOCAL, store_pos, 0, 0);
716
86.0k
    LiveRange *range  = alc->values + n;
717
86.0k
    MVM_jit_tile_list_insert(tc, list, tile, ref->tile_idx, -1); /* insert just after storage */
718
86.0k
    range->synthetic[1] = tile;
719
86.0k
    range->first = range->last = ref;
720
86.0k
721
86.0k
    range->start = order_nr(ref->tile_idx);
722
86.0k
    range->end   = order_nr(ref->tile_idx) + 1;
723
86.0k
    return n;
724
86.0k
}
725
726
90
static MVMint32 select_live_range_for_spill(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 code_pos) {
727
90
    return alc->active[alc->active_top-1];
728
90
}
729
730
731
static void live_range_spill(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list,
732
66.6k
                             MVMint32 to_spill, MVMint32 spill_pos, MVMint32 code_pos) {
733
66.6k
734
66.6k
    MVMint8 reg_spilled = alc->values[to_spill].reg_num;
735
66.6k
    /* loop over all value refs */
736
66.6k
    _DEBUG("Spilling live range value %d to memory position %d at %d", to_spill, spill_pos, code_pos);
737
66.6k
738
623k
    while (alc->values[to_spill].first != NULL) {
739
557k
        /* make a new live range */
740
557k
        MVMint32 n;
741
557k
742
557k
        /* shift current ref */
743
557k
        ValueRef *ref = alc->values[to_spill].first;
744
557k
        alc->values[to_spill].first = ref->next;
745
557k
        ref->next = NULL;
746
557k
747
557k
        if (is_arglist_ref(list, ref) && order_nr(ref->tile_idx) > code_pos) {
748
73.0k
            /* Never insert a load before a future ARGLIST; ARGLIST may easily
749
73.0k
             * consume more registers than we have available. Past ARGLISTs have
750
73.0k
             * already been handled, so we do need to insert a load a before
751
73.0k
             * them (or modify in place, but, complex!). */
752
73.0k
            continue;
753
484k
        } else if (is_definition(ref)) {
754
86.0k
            n = insert_store_after_definition(tc, alc, list, ref, spill_pos);
755
397k
        } else {
756
397k
            n = insert_load_before_use(tc, alc, list, ref, spill_pos);
757
397k
        }
758
557k
759
484k
        if (order_nr(ref->tile_idx) < code_pos) {
760
282k
            /* in the past, which means we can safely use the spilled register
761
282k
             * and immediately retire this live range */
762
282k
            assign_register(tc, alc, list, n, MVM_JIT_STORAGE_GPR, reg_spilled);
763
282k
            MVM_VECTOR_PUSH(alc->retired, n);
764
201k
        } else {
765
201k
            /* in the future, which means we need to add it to the worklist */
766
201k
            MVM_VECTOR_ENSURE_SPACE(alc->worklist, 1);
767
201k
            live_range_heap_push(alc->values, alc->worklist, &alc->worklist_num, n,
768
201k
                                 values_cmp_first_ref);
769
201k
        }
770
484k
    }
771
66.6k
772
66.6k
    /* clear value references */
773
66.6k
    alc->values[to_spill].last = NULL;
774
66.6k
775
66.6k
    /* mark as spilled and store the spill position */
776
66.6k
    alc->values[to_spill].spill_pos = spill_pos;
777
66.6k
    alc->values[to_spill].spill_idx = code_pos;
778
66.6k
    free_register(tc, alc, MVM_JIT_STORAGE_GPR, reg_spilled);
779
66.6k
    MVM_VECTOR_ENSURE_SPACE(alc->spilled, 1);
780
66.6k
    live_range_heap_push(alc->values, alc->spilled, &alc->spilled_num,
781
66.6k
                         to_spill, values_cmp_last_ref);
782
66.6k
}
783
784
785
static void prepare_arglist_and_call(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list,
786
303k
                                     MVMint32 arglist_idx, MVMint32 call_idx) {
787
303k
    MVMJitTile *arglist_tile = list->items[arglist_idx],
788
303k
                  *call_tile = list->items[call_idx];
789
303k
    MVMJitExprTree *tree = list->tree;
790
303k
    MVMint32 num_args = tree->nodes[arglist_tile->node + 1];
791
303k
    MVMint32           arg_values[16];
792
303k
    MVMJitStorageRef storage_refs[16];
793
303k
794
303k
    struct {
795
303k
        MVMint8 in_reg; /* register number that is to be moved in */
796
303k
        MVMint8 num_out; /* how many values need to make room for me */
797
303k
    } topological_map[MVM_JIT_ARCH_NUM_GPR]; /* reverse map for topological sort of moves */
798
303k
799
303k
    struct {
800
303k
        MVMint8 reg_num;
801
303k
        MVMint8 stack_pos;
802
303k
    } stack_transfer[16];
803
303k
    MVMint32 stack_transfer_top = 0;
804
303k
805
303k
    MVMint8 transfer_queue[16];
806
303k
    MVMint32 transfer_queue_idx, transfer_queue_top = 0, transfers_required = 0;
807
303k
808
303k
    MVMint8 spilled_args[16];
809
303k
    MVMint32 spilled_args_top = 0;
810
303k
811
303k
    MVMBitmap call_bitmap = 0, arg_bitmap = 0;
812
303k
813
303k
    MVMint8 spare_register;
814
303k
815
303k
    MVMint32 i, j, ins_pos = 2;
816
303k
817
303k
    /* get storage positions for arglist */
818
303k
    MVM_jit_arch_storage_for_arglist(tc, alc->compiler, tree, arglist_tile->node, storage_refs);
819
303k
820
303k
    /* get value refs for arglist */
821
1.41M
    for (i = 0; i < num_args; i++) {
822
1.11M
        MVMint32 carg  = tree->nodes[arglist_tile->node + 2 + i];
823
1.11M
        /* may refer to spilled live range */
824
1.11M
        arg_values[i] = value_set_find(alc->sets, tree->nodes[carg + 1])->idx;
825
1.11M
    }
826
303k
827
303k
    _DEBUG("prepare_call: Got %d args", num_args);
828
303k
829
303k
    /* initialize topological map, use -1 as 'undefined' inboud value */
830
5.16M
    for (i = 0; i < MVM_ARRAY_SIZE(topological_map); i++) {
831
4.86M
        topological_map[i].num_out =  0;
832
4.86M
        topological_map[i].in_reg  = -1;
833
4.86M
    }
834
303k
835
1.14M
    for (i = 0, j = 0; i < alc->active_top; i++) {
836
843k
        LiveRange *v = alc->values + alc->active[i];
837
843k
        MVMint32 code_pos = order_nr(call_idx);
838
843k
        if (v->end > code_pos && live_range_has_hole(v, code_pos) == NULL) {
839
66.5k
            /* surviving values need to be spilled */
840
66.5k
            MVMint32 spill_pos = MVM_jit_spill_memory_select(tc, alc->compiler, v->reg_type);
841
66.5k
            /* spilling at the CALL idx will mean that the spiller inserts a
842
66.5k
             * LOAD at the current register before the ARGLIST, meaning it
843
66.5k
             * remains 'live' for this ARGLIST */
844
66.5k
            _DEBUG("Spilling %d to %d at %d", alc->active[i], spill_pos, code_pos);
845
66.5k
            live_range_spill(tc, alc, list, alc->active[i], spill_pos, code_pos);
846
776k
        } else {
847
776k
            /* compact the active set */
848
776k
            alc->active[j++] = alc->active[i];
849
776k
        }
850
843k
    }
851
303k
    alc->active_top = j;
852
303k
853
1.41M
    for (i = 0; i < num_args; i++) {
854
1.11M
        LiveRange *v = alc->values + arg_values[i];
855
1.11M
        if (v->spill_idx < order_nr(call_idx)) {
856
72.9k
            /* spilled prior to the ARGLIST/CALL */
857
72.9k
            spilled_args[spilled_args_top++] = i;
858
1.03M
        } else if (storage_refs[i]._cls == MVM_JIT_STORAGE_GPR) {
859
1.00M
            MVMint8 reg_num = storage_refs[i]._pos;
860
1.00M
            if (reg_num != v->reg_num) {
861
922k
                _DEBUG("Transfer Rq(%d) -> Rq(%d)", reg_num, v->reg_num);
862
922k
                topological_map[reg_num].in_reg = v->reg_num;
863
922k
                topological_map[v->reg_num].num_out++;
864
922k
                transfers_required++;
865
86.2k
            } else {
866
86.2k
                _DEBUG("Transfer Rq(%d) not required", reg_num);
867
86.2k
            }
868
28.3k
        } else if (storage_refs[i]._cls == MVM_JIT_STORAGE_STACK) {
869
28.3k
            /* enqueue for stack transfer */
870
28.3k
            stack_transfer[stack_transfer_top].reg_num = v->reg_num;
871
28.3k
            stack_transfer[stack_transfer_top].stack_pos = storage_refs[i]._pos;
872
28.3k
            stack_transfer_top++;
873
28.3k
            /* count the outbound edge */
874
28.3k
            topological_map[v->reg_num].num_out++;
875
0
        } else {
876
0
            NYI(this_storage_class);
877
0
        }
878
1.11M
879
1.11M
        /* set bitmap */
880
1.11M
        if (storage_refs[i]._cls == MVM_JIT_STORAGE_GPR) {
881
1.08M
            MVMint8 reg_num = storage_refs[i]._pos;
882
1.08M
            MVM_bitmap_set(&arg_bitmap, reg_num);
883
1.08M
        }
884
1.11M
    }
885
303k
886
303k
    _DEBUG("%d transfers required", transfers_required);
887
303k
888
1.11M
#define INSERT_TILE(_tile, _pos, _order) MVM_jit_tile_list_insert(tc, list, _tile, _pos, _order)
889
1.11M
#define INSERT_NEXT_TILE(_tile) INSERT_TILE(_tile, arglist_idx, ins_pos++)
890
303k
#define MAKE_TILE(_code, _narg, _nval, ...) MVM_jit_tile_make(tc, alc->compiler, MVM_jit_compile_ ## _code, _narg, _nval, __VA_ARGS__)
891
303k
892
1.01M
#define INSERT_MOVE(_a, _b)          INSERT_NEXT_TILE(MAKE_TILE(move, 0, 2, _a, _b))
893
28.3k
#define INSERT_COPY_TO_STACK(_s, _r) INSERT_NEXT_TILE(MAKE_TILE(store, 2, 2, MVM_JIT_STORAGE_STACK, _s, 0, _r))
894
72.9k
#define INSERT_LOAD_LOCAL(_r, _l)    INSERT_NEXT_TILE(MAKE_TILE(load, 2, 1, MVM_JIT_STORAGE_LOCAL, _l, _r))
895
303k
#define INSERT_LOCAL_STACK_COPY(_s, _l) \
896
0
    INSERT_NEXT_TILE(MAKE_TILE(memory_copy, 4, 2, MVM_JIT_STORAGE_STACK, _s, MVM_JIT_STORAGE_LOCAL, _l, 0, spare_register))
897
303k
898
945k
#define ENQUEUE_TRANSFER(_r) do { \
899
945k
         MVMint8 _c = (_r); \
900
945k
         if (--(topological_map[(_c)].num_out) == 0 &&  \
901
945k
                topological_map[(_c)].in_reg   >= 0) { \
902
311k
             transfer_queue[transfer_queue_top++] = _c; \
903
311k
         } \
904
945k
     } while(0)
905
303k
906
303k
907
303k
    /* resolve conflicts for CALL; since we're spilling any surviving bits,
908
303k
     * we can just move it to any free registers. */
909
712k
    for (i = 0; i < call_tile->num_refs; i++) {
910
408k
        MVMint8 spec = MVM_JIT_REGISTER_FETCH(call_tile->register_spec, i + 1);
911
408k
        if (MVM_JIT_REGISTER_IS_USED(spec)) {
912
104k
            MVMint8 reg = call_tile->values[i+1];
913
104k
            MVM_bitmap_set(&call_bitmap, reg);
914
104k
        }
915
408k
    }
916
303k
917
363k
    while (call_bitmap & arg_bitmap) {
918
59.5k
        MVMuint32 free_reg = ~(call_bitmap | arg_bitmap | NVR_GPR_BITMAP);
919
59.5k
        /* FFS counts registers starting from 1 */
920
59.5k
        MVMuint8 src = MVM_FFS(call_bitmap & arg_bitmap) - 1;
921
59.5k
        MVMuint8 dst = MVM_FFS(free_reg) - 1;
922
59.5k
923
59.5k
        _ASSERT(free_reg != 0, "JIT: need to move a register but nothing is free");
924
59.5k
925
59.5k
        /* add edge */
926
59.5k
        topological_map[dst].in_reg = src;
927
59.5k
        topological_map[src].num_out++;
928
59.5k
929
59.5k
        /* update bitmap */
930
59.5k
        MVM_bitmap_delete(&call_bitmap, src);
931
59.5k
        MVM_bitmap_set(&call_bitmap, dst);
932
59.5k
933
59.5k
        /* update CALL args */
934
178k
        for (i = 0; i < call_tile->num_refs; i++) {
935
119k
            if (call_tile->values[i+1] == src) {
936
59.5k
                call_tile->values[i+1] = dst;
937
59.5k
            }
938
119k
        }
939
59.5k
    }
940
303k
941
303k
942
303k
    /* at this point, all outbound edges have been created, and none have been
943
303k
     * processed yet, so we can eqnueue all 'free' transfers */
944
5.16M
    for (i = 0; i < MVM_ARRAY_SIZE(topological_map); i++) {
945
4.86M
        if (topological_map[i].num_out == 0 &&
946
3.85M
            topological_map[i].in_reg >= 0) {
947
606k
            _DEBUG("Directly transfer %d -> %d", topological_map[i].in_reg, i);
948
606k
            transfer_queue[transfer_queue_top++] = i;
949
606k
        }
950
4.86M
    }
951
303k
952
303k
    /* with call_bitmap and arg_bitmap given, we can determine the spare
953
303k
     * register used for allocation; NB this may only be necessary in some
954
303k
     * cases */
955
303k
    spare_register = MVM_FFS(~(call_bitmap | arg_bitmap | NVR_GPR_BITMAP)) - 1;
956
303k
    _ASSERT(spare_register >= 0, "JIT: No spare register for moves");
957
303k
958
332k
    for (i = 0; i < stack_transfer_top; i++) {
959
28.3k
        MVMint8 reg_num = stack_transfer[i].reg_num;
960
28.3k
        MVMint8 stk_pos = stack_transfer[i].stack_pos;
961
28.3k
        INSERT_COPY_TO_STACK(stk_pos, reg_num);
962
28.3k
        _DEBUG("Insert stack parameter: Rq(%d) -> [rsp+%d]", reg_num, stk_pos);
963
28.3k
        ENQUEUE_TRANSFER(reg_num);
964
28.3k
    }
965
303k
966
303k
967
1.22M
    for (transfer_queue_idx = 0; transfer_queue_idx < transfer_queue_top; transfer_queue_idx++) {
968
917k
        MVMint8 dst = transfer_queue[transfer_queue_idx];
969
917k
        MVMint8 src = topological_map[dst].in_reg;
970
917k
        _ASSERT(src >= 0, "No inboud edge (sort)");
971
917k
        _DEBUG("Insert move (toposort): Rq(%d) -> Rq(%d)", src, dst);
972
917k
        INSERT_MOVE(dst, src);
973
917k
        ENQUEUE_TRANSFER(src);
974
917k
    }
975
303k
976
303k
    if (transfer_queue_top < transfers_required) {
977
32.1k
        /* suppose we have a cycle of transfers: a -> b -> c -> a;
978
32.1k
         * since we only keep the one inbound register as a reference, the chain
979
32.1k
         * is really:
980
32.1k
         * a -> c -> b -> a
981
32.1k
         * We can 'break' this cycle by walking the chain in this order, first
982
32.1k
         * moving 'a' out of thee way into a spare register, then moving c to a,
983
32.1k
         * b to c, and finally moving the spare register to 'b'
984
32.1k
         */
985
546k
        for (i = 0; i < MVM_JIT_ARCH_NUM_GPR; i++) {
986
514k
            if (topological_map[i].num_out > 0) {
987
32.1k
                MVMint8 src, dst;
988
32.1k
                INSERT_MOVE(spare_register, i);
989
32.1k
                _ASSERT(--(topological_map[i].num_out) == 0,
990
32.1k
                        "More than one outbound edge in cycle");
991
64.6k
                for (dst = i, src = topological_map[i].in_reg; src != i;
992
32.5k
                     dst = src, src = topological_map[src].in_reg) {
993
32.5k
                    _ASSERT(src >= 0, "No inbound edge (cycle breaking)");
994
32.5k
                    INSERT_MOVE(dst, src);
995
32.5k
                    _ASSERT(--(topological_map[src].num_out) == 0,
996
32.5k
                            "More than one outbound edge in cycle");
997
32.5k
                }
998
32.1k
                INSERT_MOVE(dst, spare_register);
999
32.1k
            }
1000
514k
        }
1001
32.1k
    }
1002
303k
1003
303k
    /* now all that remains is to deal with spilled values */
1004
376k
    for (i = 0; i < spilled_args_top; i++) {
1005
72.9k
        MVMint32 arg = spilled_args[i];
1006
72.9k
        LiveRange *v = alc->values + arg_values[arg];
1007
72.9k
        if (storage_refs[arg]._cls == MVM_JIT_STORAGE_GPR) {
1008
72.9k
            _DEBUG("Loading spilled value to Rq(%d) from [rbx+%d]", storage_refs[arg]._pos, v->spill_pos);
1009
72.9k
            INSERT_LOAD_LOCAL(storage_refs[arg]._pos, v->spill_pos);
1010
0
        } else if (storage_refs[arg]._cls == MVM_JIT_STORAGE_STACK) {
1011
0
            INSERT_LOCAL_STACK_COPY(storage_refs[arg]._pos, v->spill_pos);
1012
0
        } else {
1013
0
            NYI(storage_class);
1014
0
        }
1015
72.9k
    }
1016
303k
}
1017
1018
6.80M
MVM_STATIC_INLINE void process_tile(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 tile_cursor) {
1019
6.80M
    MVMJitTile *tile = list->items[tile_cursor];
1020
6.80M
1021
6.80M
    if (tile->op == MVM_JIT_ARGLIST) {
1022
303k
        MVMint32 arglist_idx = tile_cursor;
1023
303k
        MVMint32 call_idx    = tile_cursor + 1;
1024
303k
        _ASSERT(call_idx < list->items_num &&
1025
303k
                (list->items[call_idx]->op == MVM_JIT_CALL ||
1026
303k
                 list->items[call_idx]->op == MVM_JIT_CALLV),
1027
303k
                "ARGLIST tiles must be followed by CALL");
1028
6.49M
    } else if (tile->op == MVM_JIT_CALL || tile->op == MVM_JIT_CALLV) {
1029
303k
        MVMint32 arglist_idx = tile_cursor - 1;
1030
303k
        MVMint32 call_idx    = tile_cursor;
1031
303k
        _ASSERT(tile_cursor > 0 && list->items[tile_cursor - 1]->op == MVM_JIT_ARGLIST,
1032
303k
                "CALL must be preceded by ARGLIST");
1033
303k
        /*
1034
303k
         * CALL nodes can use values in registers, for example for dynamic
1035
303k
         * calls. These registers may conflict with registers used in ARGLIST,
1036
303k
         * in which case prepare_arglist_and_call will move the values to a free
1037
303k
         * register and update the call tile.
1038
303k
         *
1039
303k
         * However, as regular register-allocated values, the selected register
1040
303k
         * may be allocated for a synthetic LOAD tile after it had previously
1041
303k
         * been spilled. To ensure that allocation for the synthetic tile does
1042
303k
         * not overwrite the free register picked by the resolution code, we
1043
303k
         * must be sure that prepare_arglist_and_call will be run *after* all
1044
303k
         * registers have been allocated for the values used by the CALL tile.
1045
303k
         *
1046
303k
         * That is why prepare_arglist_and_call, which handles BOTH tiles, is
1047
303k
         * called for the CALL tile and not for the ARGLIST tile.
1048
303k
         */
1049
303k
1050
303k
        prepare_arglist_and_call(tc, alc, list, arglist_idx, call_idx);
1051
6.19M
    } else {
1052
6.19M
        MVMint32 i;
1053
6.19M
        /* deal with 'use' registers */
1054
7.23M
        for  (i = 1; i < tile->num_refs; i++) {
1055
1.03M
            MVMint8 spec = MVM_JIT_REGISTER_FETCH(tile->register_spec, i);
1056
1.03M
            if (MVM_JIT_REGISTER_IS_USED(spec) && MVM_JIT_REGISTER_HAS_REQUIREMENT(spec)) {
1057
0
                /* we could use the register map here, but let's wait and see */
1058
0
                NYI(tile_use_requirements);
1059
0
            }
1060
1.03M
        }
1061
6.19M
    }
1062
6.80M
}
1063
1064
3.57M
static void process_live_range(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list, MVMint32 v) {
1065
3.57M
    MVMint8 reg;
1066
3.57M
    MVMint32 tile_order_nr = alc->values[v].start;
1067
3.57M
    if (MVM_JIT_REGISTER_HAS_REQUIREMENT(alc->values[v].register_spec)) {
1068
1.42M
        reg = MVM_JIT_REGISTER_REQUIREMENT(alc->values[v].register_spec);
1069
1.42M
        if (MVM_bitmap_get_low(NVR_GPR_BITMAP, reg)) {
1070
1.42M
            assign_register(tc, alc, list, v, MVM_JIT_STORAGE_NVR, reg);
1071
0
        } else {
1072
0
            /* TODO; might require swapping / spilling */
1073
0
            NYI(general_purpose_register_spec);
1074
0
        }
1075
2.15M
    } else {
1076
2.15M
        while ((reg = get_register(tc, alc, MVM_JIT_STORAGE_GPR)) < 0) {
1077
90
            /* choose a live range, a register to spill, and a spill location */
1078
90
            MVMint32 to_spill   = select_live_range_for_spill(tc, alc, list, tile_order_nr);
1079
90
            MVMint32 spill_pos  = MVM_jit_spill_memory_select(tc, alc->compiler, alc->values[to_spill].reg_type);
1080
90
            active_set_splice(tc, alc, to_spill);
1081
90
            _DEBUG("Spilling live range %d at %d to %d to free up a register",
1082
90
                   to_spill, tile_order_nr, spill_pos);
1083
90
            live_range_spill(tc, alc, list, to_spill, spill_pos, tile_order_nr);
1084
90
        }
1085
2.15M
        assign_register(tc, alc, list, v, MVM_JIT_STORAGE_GPR, reg);
1086
2.15M
        active_set_add(tc, alc, v);
1087
2.15M
    }
1088
3.57M
1089
3.57M
}
1090
1091
253k
static void linear_scan(MVMThreadContext *tc, RegisterAllocator *alc, MVMJitTileList *list) {
1092
253k
    MVMint32 tile_cursor = 0;
1093
253k
    MVM_VECTOR_INIT(alc->retired, alc->worklist_num);
1094
253k
    MVM_VECTOR_INIT(alc->spilled, 8);
1095
253k
    _DEBUG("STARTING LINEAR SCAN: %d/%d", tc->instance->jit_seq_nr, list->tree->seq_nr);
1096
253k
    /* loop over all tiles and peek on the value heap */
1097
10.6M
    while (tile_cursor < list->items_num) {
1098
10.3M
1099
10.3M
        if (alc->worklist_num > 0 && live_range_heap_peek(alc->values, alc->worklist) < order_nr(tile_cursor)) {
1100
3.57M
            /* we've processed all tiles prior to this live range */
1101
3.57M
            MVMint32 live_range = live_range_heap_pop(alc->values, alc->worklist, &alc->worklist_num, values_cmp_first_ref);
1102
3.57M
            MVMint32 tile_order_nr = alc->values[live_range].start;
1103
3.57M
            _DEBUG("Processing live range %d (first ref %d, last ref %d)",
1104
3.57M
                   live_range, alc->values[live_range].start, alc->values[live_range].end);
1105
3.57M
            active_set_expire(tc, alc, tile_order_nr);
1106
3.57M
            /* We can release the spill slot only if there is no more active
1107
3.57M
             * live range overlapping with its extent. Otherwise, when we reuse
1108
3.57M
             * the slot, we risk overwriting a useful value.
1109
3.57M
             *
1110
3.57M
             * We pass the current tile_order_nr as the upper bound (e.g. when
1111
3.57M
             * there may be no active live ranges, slots will still be useful if
1112
3.57M
             * they have later uses */
1113
3.57M
            spill_set_release(tc, alc,  earliest_active_value(tc,alc, tile_order_nr));
1114
3.57M
            process_live_range(tc, alc, list, live_range);
1115
6.80M
        } else {
1116
6.80M
            /* still have tiles to process, increment cursor */
1117
6.80M
            process_tile(tc, alc, list, tile_cursor++);
1118
6.80M
        }
1119
10.3M
    }
1120
253k
1121
253k
    /* flush active live ranges */
1122
253k
    active_set_expire(tc, alc, order_nr(list->items_num) + 1);
1123
253k
    spill_set_release(tc, alc, order_nr(list->items_num) + 1);
1124
253k
    _DEBUG("END OF LINEAR SCAN%s","\n");
1125
253k
}
1126
1127
1128
253k
void MVM_jit_linear_scan_allocate(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitTileList *list) {
1129
253k
    RegisterAllocator alc;
1130
253k
    /* initialize allocator */
1131
253k
    alc.compiler = compiler;
1132
253k
    /* restart spill stack */
1133
253k
1134
253k
    alc.active_top = 0;
1135
253k
    memset(alc.active, -1, sizeof(alc.active));
1136
253k
1137
253k
    alc.reg_give = alc.reg_take = 0;
1138
253k
    memcpy(alc.reg_ring, available_gpr,
1139
253k
           sizeof(available_gpr));
1140
253k
1141
253k
    /* run algorithm */
1142
253k
    determine_live_ranges(tc, &alc, list);
1143
253k
    linear_scan(tc, &alc, list);
1144
253k
1145
253k
    /* deinitialize allocator */
1146
253k
    MVM_free(alc.sets);
1147
253k
    MVM_free(alc.refs);
1148
253k
    MVM_free(alc.holes);
1149
253k
    MVM_free(alc.values);
1150
253k
1151
253k
    MVM_free(alc.worklist);
1152
253k
    MVM_free(alc.retired);
1153
253k
    MVM_free(alc.spilled);
1154
253k
1155
253k
1156
253k
    /* make edits effective */
1157
253k
    MVM_jit_tile_list_edit(tc, list);
1158
253k
1159
253k
}