Coverage Report

Created: 2017-04-15 07:07

/home/travis/build/MoarVM/MoarVM/src/mast/compiler.c
Line
Count
Source (jump to first uncovered line)
1
#include "moar.h"
2
#include "nodes.h"
3
4
/* Some constants. */
5
#define HEADER_SIZE                 92
6
1.18k
#define BYTECODE_VERSION            5
7
8.08k
#define FRAME_HEADER_SIZE           (11 * 4 + 3 * 2)
8
300
#define FRAME_HANDLER_SIZE          (4 * 4 + 2 * 2)
9
4.97k
#define FRAME_SLV_SIZE              (2 * 2 + 2 * 4)
10
3.35k
#define SC_DEP_SIZE                 4
11
1.18k
#define EXTOP_SIZE                  (4 + 8)
12
2.36k
#define SCDEP_HEADER_OFFSET         12
13
2.36k
#define EXTOP_HEADER_OFFSET         20
14
2.36k
#define FRAME_HEADER_OFFSET         28
15
2.36k
#define CALLSITE_HEADER_OFFSET      36
16
2.36k
#define STRING_HEADER_OFFSET        44
17
0
#define SCDATA_HEADER_OFFSET        52
18
2.36k
#define BYTECODE_HEADER_OFFSET      60
19
2.36k
#define ANNOTATION_HEADER_OFFSET    68
20
1.18k
#define HLL_NAME_HEADER_OFFSET      76
21
3.54k
#define SPECIAL_FRAME_HEADER_OFFSET 80
22
0
#define EXTOP_BASE                  1024
23
24
/* Frame flags. */
25
#define FRAME_FLAG_EXIT_HANDLER     1
26
#define FRAME_FLAG_IS_THUNK         2
27
4.04k
#define FRAME_FLAG_HAS_CODE_OBJ     4
28
7.71k
#define FRAME_FLAG_HAS_INDEX        32768
29
4.04k
#define FRAME_FLAG_HAS_SLV          65536
30
31
typedef struct {
32
    /* callsite ID */
33
    unsigned short callsite_id;
34
35
    /* the identifier for the callsite, to clean up later */
36
    unsigned char *identifier;
37
38
    /* the uthash hash handle. */
39
    UT_hash_handle hash_handle;
40
} CallsiteReuseEntry;
41
42
/* Information about a handler. */
43
typedef struct {
44
    /* Offset of start of protected region from frame start. */
45
    unsigned int start_offset;
46
47
    /* Offset of end of protected region, exclusive, from frame start. */
48
    unsigned int end_offset;
49
50
    /* Exception categry mask. */
51
    unsigned int category_mask;
52
53
    /* Handler action. */
54
    unsigned short action;
55
56
    /* Local holding block to invoke, if invokey handler. */
57
    unsigned short local;
58
59
    /* Label, which will need resolving. */
60
    MASTNode *label;
61
62
    /* Local holding a label in case we have a labeled loop. */
63
    unsigned short label_reg;
64
} FrameHandler;
65
66
/* Handler actions. */
67
217
#define HANDLER_UNWIND_GOTO      0
68
22
#define HANDLER_UNWIND_GOTO_OBJ  1
69
300
#define HANDLER_INVOKE           2
70
71
/* Information about a label. */
72
typedef struct {
73
    MAST_Label *label;
74
    MVMint32    offset;          /* Negative if unknown. */
75
    MVMuint16   num_resolve;
76
    MVMuint16   alloc_resolve;
77
    MVMuint32  *resolve;
78
} LabelInfo;
79
80
/* Describes the state for the frame we're currently compiling. */
81
typedef struct {
82
    /* Position of start of bytecode. */
83
    unsigned int bytecode_start;
84
85
    /* Position of start of frame entry. */
86
    unsigned int frame_start;
87
88
    /* Types of locals and lexicals, with counts. */
89
    unsigned short *local_types;
90
    unsigned short *lexical_types;
91
    unsigned int num_locals;
92
    unsigned int num_lexicals;
93
94
    /* Number of annotations. */
95
    unsigned int num_annotations;
96
97
    /* Handlers count and list. */
98
    unsigned int num_handlers;
99
    FrameHandler *handlers;
100
101
    /* Labels we have so far (either through finding them or finding a need
102
     * to fix them up). */
103
    LabelInfo *labels;
104
    unsigned int num_labels;
105
    unsigned int alloc_labels;
106
107
    /* Number of unresolved labels. */
108
    unsigned int unresolved_labels;
109
} FrameState;
110
111
/* Describes the current writer state for the compilation unit as a whole. */
112
typedef struct {
113
    /* The set of node types. */
114
    MASTNodeTypes *types;
115
116
    /* The current frame and frame count. */
117
    FrameState   *cur_frame;
118
    unsigned int  num_frames;
119
120
    /* String heap and seen hash mapping known strings to indexes. */
121
    MASTNode *strings;
122
    MASTNode *seen_strings;
123
124
    /* The SC dependencies segment; we know the size up front. */
125
    char         *scdep_seg;
126
    unsigned int  scdep_bytes;
127
128
    /* The extension ops segment; we know the size ahead of time. */
129
    char         *extops_seg;
130
    unsigned int  extops_bytes;
131
    unsigned int  num_extops;
132
133
    /* The frame segment. */
134
    char         *frame_seg;
135
    unsigned int  frame_pos;
136
    unsigned int  frame_alloc;
137
138
    /* The callsite segment and number of callsites. */
139
    char         *callsite_seg;
140
    unsigned int  callsite_pos;
141
    unsigned int  callsite_alloc;
142
    unsigned int  num_callsites;
143
144
    /* The bytecode segment. */
145
    char         *bytecode_seg;
146
    unsigned int  bytecode_pos;
147
    unsigned int  bytecode_alloc;
148
149
    /* The annotation segment. */
150
    char         *annotation_seg;
151
    unsigned int  annotation_pos;
152
    unsigned int  annotation_alloc;
153
154
    /* Current instruction info */
155
    const MVMOpInfo    *current_op_info;
156
157
    /* Zero-based index of current frame */
158
    unsigned int  current_frame_idx;
159
160
    /* Zero-based index of MAST instructions */
161
    unsigned int  current_ins_idx;
162
163
    /* Zero-based index of current operand */
164
    unsigned int  current_operand_idx;
165
166
    /* The compilation unit we're compiling. */
167
    MAST_CompUnit *cu;
168
169
    /* Hash for callsite descriptor strings to callsite IDs */
170
    CallsiteReuseEntry *callsite_reuse_head;
171
172
    /* Last Annotated node, for error reporting */
173
    MAST_Annotated *last_annotated;
174
} WriterState;
175
176
static unsigned int umax(unsigned int a, unsigned int b);
177
static void memcpy_endian(char *dest, const void *src, size_t size);
178
static void write_int64(char *buffer, size_t offset, unsigned long long value);
179
static void write_int32(char *buffer, size_t offset, unsigned int value);
180
static void write_int16(char *buffer, size_t offset, unsigned short value);
181
static void write_int8(char *buffer, size_t offset, unsigned char value);
182
static void write_double(char *buffer, size_t offset, double value);
183
static void ensure_space(VM, char **buffer, unsigned int *alloc, unsigned int pos, unsigned int need);
184
static void cleanup_frame(VM, FrameState *fs);
185
static void cleanup_all(VM, WriterState *ws);
186
static unsigned int get_string_heap_index(VM, WriterState *ws, VMSTR *strval);
187
static unsigned short get_frame_index(VM, WriterState *ws, MASTNode *frame);
188
static unsigned short type_to_local_type(VM, WriterState *ws, MASTNode *type);
189
static void compile_operand(VM, WriterState *ws, unsigned char op_flags, MASTNode *operand);
190
static unsigned short get_callsite_id(VM, WriterState *ws, MASTNode *flags, MASTNode *args);
191
static void compile_instruction(VM, WriterState *ws, MASTNode *node);
192
static void compile_frame(VM, WriterState *ws, MASTNode *node, unsigned short idx);
193
static char * form_string_heap(VM, WriterState *ws, unsigned int *string_heap_size);
194
static char * form_bytecode_output(VM, WriterState *ws, unsigned int *bytecode_size);
195
char * MVM_mast_compile(VM, MASTNode *node, MASTNodeTypes *types, unsigned int *size);
196
197
0
static unsigned int umax(unsigned int a, unsigned int b) {
198
0
    return a > b ? a : b;
199
0
}
200
201
/* copies memory dependent on endianness */
202
1.13M
static void memcpy_endian(char *dest, const void *src, size_t size) {
203
1.13M
#ifdef MVM_BIGENDIAN
204
    size_t i;
205
    char *srcbytes = (char *)src;
206
    for (i = 0; i < size; i++)
207
        dest[size - i - 1] = srcbytes[i];
208
#else
209
1.13M
    memcpy(dest, src, size);
210
1.13M
#endif
211
1.13M
}
212
213
/* Writes an int64 into a buffer. */
214
1.26k
static void write_int64(char *buffer, size_t offset, unsigned long long value) {
215
1.26k
    memcpy_endian(buffer + offset, &value, 8);
216
1.26k
}
217
218
/* Writes an int32 into a buffer. */
219
249k
static void write_int32(char *buffer, size_t offset, unsigned int value) {
220
249k
    memcpy_endian(buffer + offset, &value, 4);
221
249k
}
222
223
/* Writes an int16 into a buffer. */
224
882k
static void write_int16(char *buffer, size_t offset, unsigned short value) {
225
882k
    memcpy_endian(buffer + offset, &value, 2);
226
882k
}
227
228
/* Writes an int8 into a buffer. */
229
9.43k
static void write_int8(char *buffer, size_t offset, unsigned char value) {
230
9.43k
    memcpy(buffer + offset, &value, 1);
231
9.43k
}
232
233
/* Writes an double into a buffer. */
234
398
static void write_double(char *buffer, size_t offset, double value) {
235
398
    memcpy_endian(buffer + offset, &value, 8);
236
398
}
237
238
/* Ensures the specified buffer has enough space and expands it if so. */
239
860k
static void ensure_space(VM, char **buffer, unsigned int *alloc, unsigned int pos, unsigned int need) {
240
860k
    if (pos + need > *alloc) {
241
2.59k
        do { *alloc = *alloc * 2; } while (pos + need > *alloc);
242
2.59k
        *buffer = (char *)MVM_realloc(*buffer, *alloc);
243
2.59k
    }
244
860k
}
245
246
/* Cleans up all allocated memory related to a frame. */
247
4.04k
static void cleanup_frame(VM, FrameState *fs) {
248
4.04k
    if (fs->local_types)
249
4.04k
        MVM_free(fs->local_types);
250
4.04k
    if (fs->lexical_types)
251
4.04k
        MVM_free(fs->lexical_types);
252
4.04k
    if (fs->handlers)
253
77
        MVM_free(fs->handlers);
254
4.04k
    if (fs->labels) {
255
1.62k
        MVMuint32 i;
256
22.2k
        for (i = 0; i < fs->num_labels; i++)
257
20.5k
            if (fs->labels[i].alloc_resolve)
258
0
                MVM_free(fs->labels[i].resolve);
259
1.62k
        MVM_free(fs->labels);
260
1.62k
    }
261
4.04k
    MVM_free(fs);
262
4.04k
}
263
264
/* Cleans up all allocated memory related to this compilation. */
265
1.18k
static void cleanup_all(VM, WriterState *ws) {
266
1.18k
    CallsiteReuseEntry *current, *tmp;
267
1.18k
    unsigned bucket_tmp;
268
1.18k
    if (ws->cur_frame)
269
0
        cleanup_frame(vm, ws->cur_frame);
270
1.18k
    if (ws->scdep_seg)
271
1.10k
        MVM_free(ws->scdep_seg);
272
1.18k
    if (ws->extops_seg)
273
1.18k
        MVM_free(ws->extops_seg);
274
1.18k
    if (ws->frame_seg)
275
1.18k
        MVM_free(ws->frame_seg);
276
1.18k
    if (ws->callsite_seg)
277
1.18k
        MVM_free(ws->callsite_seg);
278
1.18k
    if (ws->bytecode_seg)
279
1.18k
        MVM_free(ws->bytecode_seg);
280
1.18k
    if (ws->annotation_seg)
281
1.18k
        MVM_free(ws->annotation_seg);
282
3.39k
    HASH_ITER(hash_handle, ws->callsite_reuse_head, current, tmp, bucket_tmp) {
283
3.39k
        MVM_free(current->identifier);
284
3.39k
    }
285
1.18k
    MVM_HASH_DESTROY(hash_handle, CallsiteReuseEntry, ws->callsite_reuse_head);
286
1.18k
    MVM_free(ws);
287
1.18k
}
288
289
/* Gets the index of a string already in the string heap, or
290
 * adds it to the heap if it's not already there. */
291
65.1k
static unsigned int get_string_heap_index(VM, WriterState *ws, VMSTR *strval) {
292
65.1k
    if (EXISTSKEY(vm, ws->seen_strings, strval)) {
293
32.6k
        return (unsigned int)ATKEY_I(vm, ws->seen_strings, strval);
294
32.6k
    }
295
32.5k
    else {
296
32.5k
        unsigned int index = (unsigned int)ELEMS(vm, ws->strings);
297
32.5k
        if (index >= 0x7FFFFFFF) {
298
0
            cleanup_all(vm, ws);
299
0
            DIE(vm, "Too many strings in compilation unit");
300
0
        }
301
32.5k
        BINDPOS_S(vm, ws->strings, index, strval);
302
32.5k
        BINDKEY_I(vm, ws->seen_strings, strval, index);
303
32.5k
        return index;
304
32.5k
    }
305
65.1k
}
306
307
/* Locates the index of a frame. */
308
6.18k
static unsigned short get_frame_index(VM, WriterState *ws, MASTNode *frame) {
309
6.18k
    if (((MAST_Frame *)frame)->flags & FRAME_FLAG_HAS_INDEX) {
310
6.18k
        return (short)((MAST_Frame *)frame)->index;
311
6.18k
    }
312
0
    else {
313
0
        int num_frames = ELEMS(vm, ws->cu->frames);
314
0
        unsigned short i;
315
0
        for (i = 0; i < num_frames; i++)
316
0
            if (ATPOS(vm, ws->cu->frames, i) == frame)
317
0
                return i;
318
0
        cleanup_all(vm, ws);
319
0
        DIE(vm, "MAST::Frame passed for code ref not found in compilation unit");
320
0
    }
321
6.18k
}
322
323
/* Takes a 6model object type and turns it into a local/lexical type flag. */
324
58.6k
static unsigned short type_to_local_type(VM, WriterState *ws, MASTNode *type) {
325
58.6k
    const MVMStorageSpec *ss;
326
58.6k
    if (VM_OBJ_IS_NULL(type))
327
0
        return MVM_reg_obj;
328
58.6k
    ss = REPR(type)->get_storage_spec(vm, STABLE(type));
329
58.6k
    if (ss->inlineable) {
330
26.1k
        switch (ss->boxed_primitive) {
331
22.1k
            case MVM_STORAGE_SPEC_BP_INT:
332
22.1k
                if (ss->is_unsigned) {
333
0
                    switch (ss->bits) {
334
0
                        case 8:
335
0
                            return MVM_reg_uint8;
336
0
                        case 16:
337
0
                            return MVM_reg_uint16;
338
0
                        case 32:
339
0
                            return MVM_reg_uint32;
340
0
                        case 64:
341
0
                            return MVM_reg_uint64;
342
0
                        default:
343
0
                            cleanup_all(vm, ws);
344
0
                            DIE(vm, "Invalid int size for local/lexical");
345
0
                    }
346
0
                }
347
22.1k
                else {
348
22.1k
                    switch (ss->bits) {
349
2
                        case 8:
350
2
                            return MVM_reg_int8;
351
2
                        case 16:
352
2
                            return MVM_reg_int16;
353
2
                        case 32:
354
2
                            return MVM_reg_int32;
355
22.0k
                        case 64:
356
22.0k
                            return MVM_reg_int64;
357
0
                        default:
358
0
                            cleanup_all(vm, ws);
359
0
                            DIE(vm, "Invalid int size for local/lexical");
360
22.1k
                    }
361
22.1k
                }
362
0
                break;
363
547
            case MVM_STORAGE_SPEC_BP_NUM:
364
547
                switch (ss->bits) {
365
0
                    case 32:
366
0
                        return MVM_reg_num32;
367
547
                    case 64:
368
547
                        return MVM_reg_num64;
369
0
                    default:
370
0
                        cleanup_all(vm, ws);
371
0
                        DIE(vm, "Invalid num size for local/lexical");
372
547
                }
373
0
                break;
374
3.52k
            case MVM_STORAGE_SPEC_BP_STR:
375
3.52k
                return MVM_reg_str;
376
0
            default:
377
0
                cleanup_all(vm, ws);
378
0
                DIE(vm, "Type used for local/lexical has invalid boxed primitive in storage spec");
379
26.1k
        }
380
26.1k
    }
381
32.4k
    else {
382
32.4k
        return MVM_reg_obj;
383
32.4k
    }
384
58.6k
}
385
386
/* Grows label storage. */
387
20.5k
static void add_label(VM, FrameState *fs, MAST_Label *l, MVMint32 offset) {
388
20.5k
    if (fs->num_labels == fs->alloc_labels) {
389
3.28k
        if (fs->alloc_labels)
390
1.65k
            fs->alloc_labels *= 2;
391
3.28k
        else
392
1.62k
            fs->alloc_labels = 8;
393
3.28k
        fs->labels = MVM_realloc(fs->labels, fs->alloc_labels * sizeof(LabelInfo));
394
3.28k
    }
395
20.5k
    fs->labels[fs->num_labels].label         = l;
396
20.5k
    fs->labels[fs->num_labels].offset        = offset;
397
20.5k
    fs->labels[fs->num_labels].resolve       = NULL;
398
20.5k
    fs->labels[fs->num_labels].num_resolve   = 0;
399
20.5k
    fs->labels[fs->num_labels].alloc_resolve = 0;
400
20.5k
    fs->num_labels++;
401
20.5k
}
402
403
/* Takes a label and either writes its offset if we already saw it, or writes
404
 * a zero and records that a fixups is needed. */
405
34.8k
static void write_label_or_add_fixup(VM, WriterState *ws, MAST_Label *l) {
406
34.8k
    FrameState *fs   = ws->cur_frame;
407
34.8k
    LabelInfo  *info = NULL;
408
34.8k
    MVMuint32   i;
409
34.8k
410
34.8k
    /* Ensure we've space to write an offset. */
411
34.8k
    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 4);
412
34.8k
413
34.8k
    /* Look for the label. */
414
297k
    for (i = 0; i < fs->num_labels; i++) {
415
280k
        if (fs->labels[i].label == l) {
416
17.9k
            /* Found it. If we know its offset, write and we're done. */
417
17.9k
            MVMint32 offset = fs->labels[i].offset;
418
17.9k
            if (offset >= 0) {
419
6.54k
                write_int32(ws->bytecode_seg, ws->bytecode_pos, offset);
420
6.54k
                ws->bytecode_pos += 4;
421
6.54k
                return;
422
6.54k
            }
423
17.9k
424
17.9k
            /* Otherwise, note this label to add the resolve need to. */
425
11.3k
            info = &(fs->labels[i]);
426
11.3k
            break;
427
17.9k
        }
428
280k
    }
429
34.8k
430
34.8k
    /* If we don't have an entry for this label yet, add it. */
431
28.3k
    if (!info) {
432
16.9k
        add_label(vm, fs, l, -1);
433
16.9k
        info = &(fs->labels[fs->num_labels - 1]);
434
16.9k
    }
435
28.3k
    if (info->num_resolve == info->alloc_resolve) {
436
17.0k
        if (info->alloc_resolve)
437
107
            info->alloc_resolve *= 2;
438
17.0k
        else
439
16.9k
            info->alloc_resolve = 8;
440
17.0k
        info->resolve = MVM_realloc(info->resolve, info->alloc_resolve * sizeof(MVMuint32));
441
17.0k
    }
442
28.3k
    info->resolve[info->num_resolve] = ws->bytecode_pos;
443
28.3k
    info->num_resolve++;
444
28.3k
    fs->unresolved_labels++;
445
28.3k
446
28.3k
    /* Write zero, to be fixed up later. */
447
28.3k
    write_int32(ws->bytecode_seg, ws->bytecode_pos, 0);
448
28.3k
    ws->bytecode_pos += 4;
449
28.3k
}
450
451
/* Takes a label, and either adds it to the labels collection or, if it's been
452
 * seen already, resolves its fixups. */
453
20.5k
static void add_label_and_resolve_fixups(VM, WriterState *ws, MAST_Label *l) {
454
20.5k
    FrameState *fs     = ws->cur_frame;
455
20.5k
    MVMuint32   offset = ws->bytecode_pos - ws->cur_frame->bytecode_start;
456
20.5k
    MVMuint32   i, j;
457
20.5k
458
20.5k
    /* See if it has an existing entry. */
459
194k
    for (i = 0; i < fs->num_labels; i++) {
460
191k
        if (fs->labels[i].label == l) {
461
16.9k
            /* Found it. Must not already have an offset, or it's a dupe. */
462
16.9k
            if (fs->labels[i].offset < 0) {
463
16.9k
                /* Fix up existing usages. */
464
16.9k
                MVMuint32 *resolve = fs->labels[i].resolve;
465
16.9k
                MVMuint32  nr      = fs->labels[i].num_resolve;
466
45.3k
                for (j = 0; j < nr; j++)
467
28.3k
                    write_int32(ws->bytecode_seg, resolve[j], offset);
468
16.9k
                fs->labels[i].offset        = offset;
469
16.9k
                fs->labels[i].alloc_resolve = 0;
470
16.9k
                fs->labels[i].num_resolve   = 0;
471
16.9k
                fs->unresolved_labels      -= nr;
472
16.9k
                MVM_free(fs->labels[i].resolve);
473
16.9k
            }
474
0
            else {
475
0
                cleanup_all(vm, ws);
476
0
                DIE(vm, "Duplicate label");
477
0
            }
478
16.9k
            return;
479
16.9k
        }
480
191k
    }
481
20.5k
482
20.5k
    /* If we get here, no entry; create one. */
483
3.58k
    add_label(vm, fs, l, offset);
484
3.58k
}
485
486
/* Rreturns a label's offset, dying if it's not possible. */
487
static MVMuint32 demand_label_offset(VM, WriterState *ws, MAST_Label *l,
488
300
                                     const char *error) {
489
300
    FrameState *fs = ws->cur_frame;
490
300
    MVMuint32   nl = fs->num_labels;
491
300
    MVMuint32   i;
492
6.51k
    for (i = 0; i < nl; i++) {
493
6.51k
        if (fs->labels[i].label == l) {
494
300
            if (fs->labels[i].offset >= 0)
495
300
                return fs->labels[i].offset;
496
0
            break;
497
300
        }
498
6.51k
    }
499
0
    cleanup_all(vm, ws);
500
0
    DIE(vm, "%s", error);
501
0
}
502
503
/* Compiles the operand to an instruction; this involves checking
504
 * that we have a node of the correct type for it and writing out
505
 * the appropriate thing to the bytecode stream. */
506
539k
static void compile_operand(VM, WriterState *ws, unsigned char op_flags, MASTNode *operand) {
507
539k
    unsigned char op_rw   = op_flags & MVM_operand_rw_mask;
508
539k
    unsigned char op_type = op_flags & MVM_operand_type_mask;
509
539k
    unsigned short int local_type;
510
539k
    if (op_rw == MVM_operand_literal) {
511
117k
        /* Literal; go by type. */
512
117k
        switch (op_type) {
513
1.26k
            case MVM_operand_int64: {
514
1.26k
                if (ISTYPE(vm, operand, ws->types->IVal)) {
515
1.26k
                    MAST_IVal *iv = GET_IVal(operand);
516
1.26k
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 8);
517
1.26k
                    write_int64(ws->bytecode_seg, ws->bytecode_pos, iv->value);
518
1.26k
                    ws->bytecode_pos += 8;
519
1.26k
                }
520
0
                else {
521
0
                    cleanup_all(vm, ws);
522
0
                    DIE(vm, "Expected MAST::IVal, but didn't get one");
523
0
                }
524
1.26k
                break;
525
1.26k
            }
526
36.9k
            case MVM_operand_int16: {
527
36.9k
                if (ISTYPE(vm, operand, ws->types->IVal)) {
528
36.9k
                    MAST_IVal *iv = GET_IVal(operand);
529
36.9k
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
530
36.9k
                    if (iv->value > 32767 || iv->value < -32768) {
531
0
                        cleanup_all(vm, ws);
532
0
                        DIE(vm, "Value outside range of 16-bit MAST::IVal");
533
0
                    }
534
36.9k
                    write_int16(ws->bytecode_seg, ws->bytecode_pos, (short)iv->value);
535
36.9k
                    ws->bytecode_pos += 2;
536
36.9k
                }
537
0
                else {
538
0
                    cleanup_all(vm, ws);
539
0
                    DIE(vm, "Expected MAST::IVal, but didn't get one");
540
0
                }
541
36.9k
                break;
542
1.26k
            }
543
398
            case MVM_operand_num64: {
544
398
                if (ISTYPE(vm, operand, ws->types->NVal)) {
545
398
                    MAST_NVal *nv = GET_NVal(operand);
546
398
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 8);
547
398
                    write_double(ws->bytecode_seg, ws->bytecode_pos, nv->value);
548
398
                    ws->bytecode_pos += 8;
549
398
                }
550
0
                else {
551
0
                    cleanup_all(vm, ws);
552
0
                    DIE(vm, "Expected MAST::NVal, but didn't get one");
553
0
                }
554
398
                break;
555
1.26k
            }
556
39.1k
            case MVM_operand_str: {
557
39.1k
                if (ISTYPE(vm, operand, ws->types->SVal)) {
558
39.1k
                    MAST_SVal *sv = GET_SVal(operand);
559
39.1k
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 4);
560
39.1k
                    write_int32(ws->bytecode_seg, ws->bytecode_pos,
561
39.1k
                        get_string_heap_index(vm, ws, sv->value));
562
39.1k
                    ws->bytecode_pos += 4;
563
39.1k
                }
564
0
                else {
565
0
                    cleanup_all(vm, ws);
566
0
                    DIE(vm, "Expected MAST::SVal, but didn't get one");
567
0
                }
568
39.1k
                break;
569
1.26k
            }
570
34.8k
            case MVM_operand_ins: {
571
34.8k
                if (ISTYPE(vm, operand, ws->types->Label)) {
572
34.8k
                    write_label_or_add_fixup(vm, ws, GET_Label(operand));
573
34.8k
                }
574
0
                else {
575
0
                    cleanup_all(vm, ws);
576
0
                    DIE(vm, "Expected MAST::Label, but didn't get one");
577
0
                }
578
34.8k
                break;
579
1.26k
            }
580
4.85k
            case MVM_operand_coderef: {
581
4.85k
                if (ISTYPE(vm, operand, ws->types->Frame)) {
582
4.85k
                    /* Find the frame index in the compilation unit. */
583
4.85k
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
584
4.85k
                    write_int16(ws->bytecode_seg, ws->bytecode_pos,
585
4.85k
                        get_frame_index(vm, ws, operand));
586
4.85k
                    ws->bytecode_pos += 2;
587
4.85k
                }
588
0
                else {
589
0
                    cleanup_all(vm, ws);
590
0
                    DIE(vm, "Expected MAST::Frame, but didn't get one");
591
0
                }
592
4.85k
                break;
593
1.26k
            }
594
0
            default:
595
0
                cleanup_all(vm, ws);
596
0
                DIE(vm, "Unhandled literal type in MAST compiler");
597
117k
        }
598
117k
    }
599
421k
    else if (op_rw == MVM_operand_read_reg || op_rw == MVM_operand_write_reg) {
600
418k
        /* The operand node had best be a MAST::Local. */
601
418k
        if (ISTYPE(vm, operand, ws->types->Local)) {
602
418k
            MAST_Local *l = GET_Local(operand);
603
418k
604
418k
            /* Ensure it's within the set of known locals. */
605
418k
            if (l->index >= ws->cur_frame->num_locals) {
606
0
                cleanup_all(vm, ws);
607
0
                DIE(vm, "MAST::Local index out of range");
608
0
            }
609
418k
610
418k
            /* Check the type matches. */
611
418k
            local_type = ws->cur_frame->local_types[l->index];
612
418k
            if (op_type != local_type << 3 && op_type != MVM_operand_type_var) {
613
0
                unsigned int  current_frame_idx = ws->current_frame_idx;
614
0
                unsigned int  current_ins_idx = ws->current_ins_idx;
615
0
                const char *name = ws->current_op_info->name;
616
0
                unsigned int  current_operand_idx = ws->current_operand_idx;
617
0
                cleanup_all(vm, ws);
618
0
                DIE(vm, "At Frame %u, Instruction %u, op '%s', operand %u, "
619
0
                    "MAST::Local of wrong type (%u) specified; expected %u",
620
0
                    current_frame_idx, current_ins_idx,
621
0
                    name, current_operand_idx,
622
0
                    local_type, (op_type >> 3));
623
0
            }
624
418k
625
418k
            /* Write the operand type. */
626
418k
            if (l->index < 0 || l->index > 32768)
627
0
                DIE(vm, "Frame %u local access out of range", ws->current_frame_idx);
628
418k
            ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
629
418k
            write_int16(ws->bytecode_seg, ws->bytecode_pos, (unsigned short)l->index);
630
418k
            ws->bytecode_pos += 2;
631
418k
        }
632
0
        else {
633
0
            unsigned int  current_frame_idx = ws->current_frame_idx;
634
0
            unsigned int  current_ins_idx = ws->current_ins_idx;
635
0
            const char *name = ws->current_op_info->name;
636
0
            unsigned int  current_operand_idx = ws->current_operand_idx;
637
0
            cleanup_all(vm, ws);
638
0
            DIE(vm, "At Frame %u, Instruction %u, op '%s', operand %u, expected MAST::Local, but didn't get one",
639
0
                current_frame_idx, current_ins_idx, name, current_operand_idx);
640
0
        }
641
418k
    }
642
3.25k
    else if (op_rw == MVM_operand_read_lex || op_rw == MVM_operand_write_lex) {
643
3.25k
        /* The operand node should be a MAST::Lexical. */
644
3.25k
        if (ISTYPE(vm, operand, ws->types->Lexical)) {
645
3.25k
            MAST_Lexical *l = GET_Lexical(operand);
646
3.25k
647
3.25k
            /* Write the index, then the frame count. */
648
3.25k
            ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 4);
649
3.25k
            write_int16(ws->bytecode_seg, ws->bytecode_pos, (unsigned short)l->index);
650
3.25k
            ws->bytecode_pos += 2;
651
3.25k
            write_int16(ws->bytecode_seg, ws->bytecode_pos, (unsigned short)l->frames_out);
652
3.25k
            ws->bytecode_pos += 2;
653
3.25k
        }
654
0
        else {
655
0
            cleanup_all(vm, ws);
656
0
            DIE(vm, "Expected MAST::Lexical, but didn't get one");
657
0
        }
658
3.25k
    }
659
0
    else {
660
0
        cleanup_all(vm, ws);
661
0
        DIE(vm, "Unknown operand type cannot be compiled");
662
0
    }
663
539k
    ws->current_operand_idx++;
664
539k
}
665
666
/* Takes a set of flags describing a callsite. Writes out a callsite
667
 * descriptor and returns the index of it. */
668
13.0k
static unsigned short get_callsite_id(VM, WriterState *ws, MASTNode *flag_node, MASTNode *args) {
669
13.0k
    unsigned int        num_nameds = 0;
670
13.0k
    unsigned short      i, identifier_len;
671
13.0k
    unsigned char      *flags, *identifier;
672
13.0k
    unsigned int       *named_idxs;
673
13.0k
    CallsiteReuseEntry *entry = NULL;
674
13.0k
675
13.0k
    /* Get callsite elements and work out if a padding byte will be needed. */
676
13.0k
    unsigned short elems = (unsigned short)ELEMS(vm, flag_node);
677
13.0k
    unsigned short align = elems % 2;
678
13.0k
679
13.0k
    /* See if the callsite has any named args, and get string pool entries
680
13.0k
     * for them if so. */
681
13.0k
    flags      = (unsigned char *)MVM_malloc(elems);
682
13.0k
    named_idxs = (unsigned int *)MVM_malloc(elems * sizeof(int));
683
36.6k
    for (i = 0; i < elems; i++) {
684
23.5k
        flags[i] = (unsigned char)ATPOS_I_C(vm, flag_node, i);
685
23.5k
        if (flags[i] & (MVM_CALLSITE_ARG_NAMED)) {
686
2.85k
            MASTNode *argname = ATPOS(vm, args, i + num_nameds);
687
2.85k
            if (ISTYPE(vm, argname, ws->types->SVal)) {
688
2.85k
                named_idxs[num_nameds] = get_string_heap_index(vm, ws,
689
2.85k
                    ((MAST_SVal *)argname)->value);
690
2.85k
                num_nameds++;
691
2.85k
            }
692
0
            else {
693
0
                DIE(vm, "Malformed callsite args: missing MAST::SVal for argument name");
694
0
            }
695
2.85k
        }
696
23.5k
    }
697
13.0k
698
13.0k
    /* See if we already know this callsite. */
699
13.0k
    identifier_len = elems + num_nameds * sizeof(int);
700
13.0k
    identifier     = MVM_malloc(identifier_len);
701
13.0k
    memcpy(identifier, flags, elems);
702
13.0k
    memcpy(identifier + elems, named_idxs, identifier_len - elems);
703
13.0k
    HASH_FIND(hash_handle, ws->callsite_reuse_head, identifier, identifier_len, entry);
704
13.0k
    if (entry) {
705
9.68k
        MVM_free(flags);
706
9.68k
        MVM_free(named_idxs);
707
9.68k
        MVM_free(identifier);
708
9.68k
        return entry->callsite_id;
709
9.68k
    }
710
3.39k
    entry = (CallsiteReuseEntry *)MVM_malloc(sizeof(CallsiteReuseEntry));
711
3.39k
    entry->callsite_id = (unsigned short)ws->num_callsites;
712
3.39k
    entry->identifier = identifier;
713
3.39k
    HASH_ADD_KEYPTR(hash_handle, ws->callsite_reuse_head, identifier, identifier_len, entry);
714
3.39k
715
3.39k
    /* Emit callsite; be sure to pad if there's uneven number of flags. */
716
3.39k
    ensure_space(vm, &ws->callsite_seg, &ws->callsite_alloc, ws->callsite_pos,
717
3.39k
        2 + elems + align);
718
3.39k
    write_int16(ws->callsite_seg, ws->callsite_pos, elems);
719
3.39k
    ws->callsite_pos += 2;
720
10.1k
    for (i = 0; i < elems; i++)
721
6.75k
        write_int8(ws->callsite_seg, ws->callsite_pos++, flags[i]);
722
3.39k
    if (align)
723
2.67k
        write_int8(ws->callsite_seg, ws->callsite_pos++, 0);
724
3.39k
725
3.39k
    /* Emit any nameds. */
726
3.39k
    if (num_nameds) {
727
1.09k
        ensure_space(vm, &ws->callsite_seg, &ws->callsite_alloc, ws->callsite_pos,
728
1.09k
            4 * num_nameds);
729
2.30k
        for (i = 0; i < num_nameds; i++) {
730
1.21k
            write_int32(ws->callsite_seg, ws->callsite_pos, named_idxs[i]);
731
1.21k
            ws->callsite_pos += 4;
732
1.21k
        }
733
1.09k
    }
734
3.39k
735
3.39k
    MVM_free(flags);
736
3.39k
    MVM_free(named_idxs);
737
3.39k
738
3.39k
    return (unsigned short)ws->num_callsites++;
739
3.39k
}
740
741
20.4k
#define OVERRIDE_WITH_32 1
742
40.7k
#define OVERRIDE_WITH_16 2
743
744
/* Compiles an instruction (which may actaully be any of the
745
 * nodes valid directly in a Frame's instruction list, which
746
 * means labels are valid too). */
747
269k
static void compile_instruction(VM, WriterState *ws, MASTNode *node) {
748
269k
    if (ISTYPE(vm, node, ws->types->Op)) {
749
226k
        MAST_Op   *o = GET_Op(node);
750
226k
        const MVMOpInfo *info;
751
226k
        int        i;
752
226k
        unsigned char override_second_argument = 0;
753
226k
754
226k
        /* Look up opcode and get argument info. */
755
226k
        unsigned short op   = o->op;
756
226k
        info = MVM_op_get_op(op);
757
226k
        if (!info)
758
0
            DIE(vm, "Invalid op specified in instruction %d", op);
759
226k
        ws->current_op_info = info;
760
226k
        ws->current_operand_idx = 0;
761
226k
762
226k
        /* Ensure argument count matches up. */
763
226k
        if (info->num_operands != 0 && ELEMS(vm, o->operands) != info->num_operands) {
764
0
            unsigned int  current_frame_idx = ws->current_frame_idx;
765
0
            unsigned int  current_ins_idx = ws->current_ins_idx;
766
0
            const char *name = ws->current_op_info->name;
767
0
            cleanup_all(vm, ws);
768
0
            DIE(vm, "At Frame %u, Instruction %u, op '%s' has invalid number (%u) of operands; needs %u.",
769
0
                current_frame_idx, current_ins_idx, name,
770
0
                ELEMS(vm, o->operands), info->num_operands);
771
0
        }
772
226k
773
226k
        /* If we're outputting a const_i64 instruction, we may want to */
774
226k
        /* turn it into a const_i64_32 or const_i64_16 instead if it fits */
775
226k
        if (op == MVM_OP_const_i64) {
776
20.3k
            MASTNode *operand = ATPOS(vm, o->operands, 1);
777
20.3k
            MAST_IVal *iv = GET_IVal(operand);
778
20.3k
            if (INT16_MIN <= iv->value && iv->value <= INT16_MAX) {
779
20.3k
                override_second_argument = OVERRIDE_WITH_16;
780
36
            } else if (INT32_MIN <= iv->value && iv->value <= INT32_MAX) {
781
36
                override_second_argument = OVERRIDE_WITH_32;
782
36
            }
783
20.3k
        }
784
226k
785
226k
        /* Write opcode. */
786
226k
        ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
787
226k
        if (override_second_argument == 0)
788
206k
            write_int16(ws->bytecode_seg, ws->bytecode_pos, op);
789
20.3k
        else if (override_second_argument == OVERRIDE_WITH_16)
790
20.3k
            write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_const_i64_16);
791
36
        else if (override_second_argument == OVERRIDE_WITH_32)
792
36
            write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_const_i64_32);
793
226k
        ws->bytecode_pos += 2;
794
226k
795
226k
        /* Write operands. */
796
735k
        for (i = 0; i < info->num_operands; i++) {
797
508k
            if (i == 1 && override_second_argument) {
798
20.3k
                MASTNode *operand = ATPOS(vm, o->operands, 1);
799
20.3k
                MAST_IVal *iv = GET_IVal(operand);
800
20.3k
                if (override_second_argument == OVERRIDE_WITH_32) {
801
36
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 4);
802
36
                    write_int32(ws->bytecode_seg, ws->bytecode_pos, (MVMint32)iv->value);
803
36
                    ws->bytecode_pos += 4;
804
20.3k
                } else {
805
20.3k
                    ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
806
20.3k
                    write_int16(ws->bytecode_seg, ws->bytecode_pos, (MVMint16)iv->value);
807
20.3k
                    ws->bytecode_pos += 2;
808
20.3k
                }
809
488k
            } else {
810
488k
                compile_operand(vm, ws, info->operands[i], ATPOS(vm, o->operands, i));
811
488k
            }
812
508k
        }
813
226k
    }
814
42.7k
    else if (ISTYPE(vm, node, ws->types->ExtOp)) {
815
0
        MAST_ExtOp *o = GET_ExtOp(node);
816
0
        MASTNode   *operands;
817
0
        int         i, num_operands;
818
0
819
0
        /* Look up opcode and get argument info. */
820
0
        unsigned short op = o->op;
821
0
        if (op < EXTOP_BASE || (op - EXTOP_BASE) >= ELEMS(vm, ws->cu->extop_sigs))
822
0
            DIE(vm, "Invalid extension op %d specified", op);
823
0
        operands = ATPOS(vm, ws->cu->extop_sigs, op - EXTOP_BASE);
824
0
        if (VM_OBJ_IS_NULL(operands))
825
0
            DIE(vm, "Missing extension op operand array for instruction %d", op);
826
0
        ws->current_op_info = NULL;
827
0
        ws->current_operand_idx = 0;
828
0
829
0
        /* Ensure argument count matches up. */
830
0
        num_operands = ELEMS(vm, operands);
831
0
        if (ELEMS(vm, o->operands) != num_operands) {
832
0
            unsigned int  current_frame_idx = ws->current_frame_idx;
833
0
            unsigned int  current_ins_idx = ws->current_ins_idx;
834
0
            char *c_name = VM_STRING_TO_C_STRING(vm, o->name);
835
0
            char *waste[] = { c_name, NULL };
836
0
            cleanup_all(vm, ws);
837
0
            DIE_FREE(vm, waste, "At Frame %u, Instruction %u, op '%s' has invalid number (%u) of operands; needs %u.",
838
0
                current_frame_idx, current_ins_idx,
839
0
                c_name,
840
0
                ELEMS(vm, o->operands), num_operands);
841
0
        }
842
0
843
0
        /* Write opcode. */
844
0
        ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
845
0
        write_int16(ws->bytecode_seg, ws->bytecode_pos, op);
846
0
        ws->bytecode_pos += 2;
847
0
848
0
        /* Write operands. */
849
0
        for (i = 0; i < num_operands; i++)
850
0
            compile_operand(vm, ws, ATPOS_I(vm, operands, i), ATPOS(vm, o->operands, i));
851
0
    }
852
42.7k
    else if (ISTYPE(vm, node, ws->types->Label)) {
853
20.5k
        add_label_and_resolve_fixups(vm, ws, GET_Label(node));
854
20.5k
    }
855
22.2k
    else if (ISTYPE(vm, node, ws->types->Call)) {
856
13.0k
        MAST_Call *c           = GET_Call(node);
857
13.0k
        unsigned char call_op  = MVM_OP_invoke_v;
858
13.0k
        unsigned char res_type = 0;
859
13.0k
        unsigned short num_flags, flag_pos, arg_pos;
860
13.0k
861
13.0k
        /* Emit callsite (may re-use existing one) and emit loading of it. */
862
13.0k
        unsigned short callsite_id = get_callsite_id(vm, ws, c->flags, c->args);
863
13.0k
        ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 4);
864
13.0k
        write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_prepargs);
865
13.0k
        ws->bytecode_pos += 2;
866
13.0k
        write_int16(ws->bytecode_seg, ws->bytecode_pos, callsite_id);
867
13.0k
        ws->bytecode_pos += 2;
868
13.0k
869
13.0k
        /* for errors */
870
13.0k
        ws->current_op_info = MVM_op_get_op(MVM_OP_prepargs);
871
13.0k
        ws->current_operand_idx = 0;
872
13.0k
873
13.0k
        /* Set up args. */
874
13.0k
        num_flags = (unsigned short)ELEMS(vm, c->flags);
875
13.0k
        arg_pos = 0;
876
36.6k
        for (flag_pos = 0; flag_pos < num_flags; flag_pos++) {
877
23.5k
            /* Handle any special flags. */
878
23.5k
            unsigned char flag = (unsigned char)ATPOS_I_C(vm, c->flags, flag_pos);
879
23.5k
            if (flag & MVM_CALLSITE_ARG_NAMED) {
880
2.85k
                ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 6);
881
2.85k
                write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_argconst_s);
882
2.85k
                ws->bytecode_pos += 2;
883
2.85k
                write_int16(ws->bytecode_seg, ws->bytecode_pos, arg_pos);
884
2.85k
                ws->bytecode_pos += 2;
885
2.85k
                compile_operand(vm, ws, MVM_operand_str, ATPOS(vm, c->args, arg_pos));
886
2.85k
                arg_pos++;
887
2.85k
            }
888
20.6k
            else if (flag & MVM_CALLSITE_ARG_FLAT) {
889
140
                /* don't need to do anything special */
890
140
            }
891
23.5k
892
23.5k
            /* Now go by flag type. */
893
23.5k
            ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 6);
894
23.5k
            if (flag & MVM_CALLSITE_ARG_OBJ) {
895
12.1k
                write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_arg_o);
896
12.1k
                ws->bytecode_pos += 2;
897
12.1k
                write_int16(ws->bytecode_seg, ws->bytecode_pos, arg_pos);
898
12.1k
                ws->bytecode_pos += 2;
899
12.1k
                compile_operand(vm, ws, MVM_operand_read_reg | MVM_operand_obj,
900
12.1k
                    ATPOS(vm, c->args, arg_pos));
901
12.1k
            }
902
11.4k
            else if (flag & MVM_CALLSITE_ARG_STR) {
903
6.26k
                write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_arg_s);
904
6.26k
                ws->bytecode_pos += 2;
905
6.26k
                write_int16(ws->bytecode_seg, ws->bytecode_pos, arg_pos);
906
6.26k
                ws->bytecode_pos += 2;
907
6.26k
                compile_operand(vm, ws, MVM_operand_read_reg | MVM_operand_str,
908
6.26k
                    ATPOS(vm, c->args, arg_pos));
909
6.26k
            }
910
5.14k
            else if (flag & MVM_CALLSITE_ARG_INT) {
911
4.99k
                write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_arg_i);
912
4.99k
                ws->bytecode_pos += 2;
913
4.99k
                write_int16(ws->bytecode_seg, ws->bytecode_pos, arg_pos);
914
4.99k
                ws->bytecode_pos += 2;
915
4.99k
                compile_operand(vm, ws, MVM_operand_read_reg | MVM_operand_int64,
916
4.99k
                    ATPOS(vm, c->args, arg_pos));
917
4.99k
            }
918
142
            else if (flag & MVM_CALLSITE_ARG_NUM) {
919
142
                write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_arg_n);
920
142
                ws->bytecode_pos += 2;
921
142
                write_int16(ws->bytecode_seg, ws->bytecode_pos, arg_pos);
922
142
                ws->bytecode_pos += 2;
923
142
                compile_operand(vm, ws, MVM_operand_read_reg | MVM_operand_num64,
924
142
                    ATPOS(vm, c->args, arg_pos));
925
142
            }
926
0
            else {
927
0
                unsigned int  current_frame_idx = ws->current_frame_idx;
928
0
                unsigned int  current_ins_idx = ws->current_ins_idx;
929
0
                const char *name = ws->current_op_info->name;
930
0
                cleanup_all(vm, ws);
931
0
                /*
932
0
                DIE(vm, "At Frame %u, Instruction %u, op '%s', "
933
0
                        "file %s, line %u, unhandled arg type %u.",
934
0
                    current_frame_idx, current_ins_idx, name,
935
0
                    ws->last_annotated ? VM_STRING_TO_C_STRING(vm, ws->last_annotated->file) : "",
936
0
                    ws->last_annotated ? ws->last_annotated->line : 0,
937
0
                    flag);
938
0
                */
939
0
                DIE(vm, "At Frame %u, Instruction %u, op '%s', unhandled arg type %u.",
940
0
                    current_frame_idx, current_ins_idx, name, flag);
941
0
            }
942
23.5k
943
23.5k
            arg_pos++;
944
23.5k
        }
945
13.0k
946
13.0k
        /* Select operation based on return type. */
947
13.0k
        if (ISTYPE(vm, c->result, ws->types->Local)) {
948
11.5k
            MAST_Local *l = GET_Local(c->result);
949
11.5k
950
11.5k
            /* Ensure it's within the set of known locals. */
951
11.5k
            if (l->index >= ws->cur_frame->num_locals) {
952
0
                cleanup_all(vm, ws);
953
0
                DIE(vm, "MAST::Local index out of range");
954
0
            }
955
11.5k
956
11.5k
            /* Go by type. */
957
11.5k
            switch (ws->cur_frame->local_types[l->index]) {
958
1
                case MVM_reg_int64:
959
1
                    call_op = MVM_OP_invoke_i;
960
1
                    res_type = MVM_operand_int64;
961
1
                    break;
962
0
                case MVM_reg_num64:
963
0
                    call_op = MVM_OP_invoke_n;
964
0
                    res_type = MVM_operand_num64;
965
0
                    break;
966
283
                case MVM_reg_str:
967
283
                    call_op = MVM_OP_invoke_s;
968
283
                    res_type = MVM_operand_str;
969
283
                    break;
970
11.2k
                case MVM_reg_obj:
971
11.2k
                    call_op = MVM_OP_invoke_o;
972
11.2k
                    res_type = MVM_operand_obj;
973
11.2k
                    break;
974
0
                default:
975
0
                    cleanup_all(vm, ws);
976
0
                    DIE(vm, "Invalid MAST::Local type for return value");
977
11.5k
            }
978
11.5k
        }
979
13.0k
980
13.0k
        /* Emit the invocation op. */
981
13.0k
        ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 6);
982
13.0k
        write_int16(ws->bytecode_seg, ws->bytecode_pos, call_op);
983
13.0k
        ws->bytecode_pos += 2;
984
13.0k
        if (call_op != MVM_OP_invoke_v)
985
11.5k
            compile_operand(vm, ws, MVM_operand_read_reg | res_type, c->result);
986
13.0k
        compile_operand(vm, ws, MVM_operand_read_reg | MVM_operand_obj, c->target);
987
13.0k
    }
988
9.13k
    else if (ISTYPE(vm, node, ws->types->Annotated)) {
989
8.83k
        MAST_Annotated *a = GET_Annotated(node);
990
8.83k
        unsigned int i;
991
8.83k
        unsigned int num_ins = ELEMS(vm, a->instructions);
992
8.83k
        unsigned int offset = ws->bytecode_pos - ws->cur_frame->bytecode_start;
993
8.83k
994
8.83k
        ws->last_annotated = a;
995
8.83k
        ensure_space(vm, &ws->annotation_seg, &ws->annotation_alloc, ws->annotation_pos, 12);
996
8.83k
        write_int32(ws->annotation_seg, ws->annotation_pos, offset);
997
8.83k
        write_int32(ws->annotation_seg, ws->annotation_pos + 4, get_string_heap_index(vm, ws, a->file));
998
8.83k
        write_int32(ws->annotation_seg, ws->annotation_pos + 8, (unsigned int)a->line);
999
8.83k
        ws->annotation_pos += 12;
1000
8.83k
        ws->cur_frame->num_annotations++;
1001
8.83k
1002
76.1k
        for (i = 0; i < num_ins; i++)
1003
67.3k
            compile_instruction(vm, ws, ATPOS(vm, a->instructions, i));
1004
8.83k
    }
1005
300
    else if (ISTYPE(vm, node, ws->types->HandlerScope)) {
1006
300
        MAST_HandlerScope *hs = GET_HandlerScope(node);
1007
300
        unsigned int i;
1008
300
        unsigned int num_ins = ELEMS(vm, hs->instructions);
1009
300
        unsigned int start   = ws->bytecode_pos - ws->cur_frame->bytecode_start;
1010
300
        unsigned int end;
1011
300
1012
2.01k
        for (i = 0; i < num_ins; i++)
1013
1.71k
            compile_instruction(vm, ws, ATPOS(vm, hs->instructions, i));
1014
300
        end = ws->bytecode_pos - ws->cur_frame->bytecode_start;
1015
300
1016
300
        ws->cur_frame->num_handlers++;
1017
300
        if (ws->cur_frame->handlers)
1018
223
            ws->cur_frame->handlers = (FrameHandler *)MVM_realloc(ws->cur_frame->handlers,
1019
223
                ws->cur_frame->num_handlers * sizeof(FrameHandler));
1020
300
        else
1021
77
            ws->cur_frame->handlers = (FrameHandler *)MVM_malloc(
1022
77
                ws->cur_frame->num_handlers * sizeof(FrameHandler));
1023
300
1024
300
        i = ws->cur_frame->num_handlers - 1;
1025
300
        ws->cur_frame->handlers[i].start_offset = start;
1026
300
        ws->cur_frame->handlers[i].end_offset = end;
1027
300
        ws->cur_frame->handlers[i].category_mask = (unsigned int)hs->category_mask;
1028
300
        ws->cur_frame->handlers[i].action = (unsigned short)hs->action;
1029
300
        if (ws->cur_frame->handlers[i].category_mask & MVM_EX_CAT_LABELED) {
1030
23
            if (ISTYPE(vm, hs->label_local, ws->types->Local)) {
1031
23
                MAST_Local *l = GET_Local(hs->label_local);
1032
23
1033
23
                /* Ensure it's within the set of known locals and an object. */
1034
23
                if (l->index >= ws->cur_frame->num_locals) {
1035
0
                    cleanup_all(vm, ws);
1036
0
                    DIE(vm, "MAST::Local index out of range in HandlerScope");
1037
0
                }
1038
23
                if (ws->cur_frame->local_types[l->index] != MVM_reg_obj) {
1039
0
                    cleanup_all(vm, ws);
1040
0
                    DIE(vm, "MAST::Local for HandlerScope must be an object");
1041
0
                }
1042
23
1043
23
                /* Stash local index. */
1044
23
                ws->cur_frame->handlers[i].label_reg = (unsigned short)l->index;
1045
23
            }
1046
0
            else {
1047
0
                cleanup_all(vm, ws);
1048
0
                DIE(vm, "MAST::Local required for HandlerScope with loop label");
1049
0
            }
1050
23
        }
1051
300
1052
300
        /* Ensure we have a label. */
1053
300
        if (ISTYPE(vm, hs->goto_label, ws->types->Label)) {
1054
300
            ws->cur_frame->handlers[i].label = hs->goto_label;
1055
300
        }
1056
0
        else {
1057
0
            cleanup_all(vm, ws);
1058
0
            DIE(vm, "MAST::Label required for HandlerScope goto");
1059
0
        }
1060
300
1061
300
        /* May need a block also. */
1062
300
        if (hs->action == HANDLER_INVOKE) {
1063
83
            if (ISTYPE(vm, hs->block_local, ws->types->Local)) {
1064
83
                MAST_Local *l = GET_Local(hs->block_local);
1065
83
1066
83
                /* Ensure it's within the set of known locals and an object. */
1067
83
                if (l->index >= ws->cur_frame->num_locals) {
1068
0
                    cleanup_all(vm, ws);
1069
0
                    DIE(vm, "MAST::Local index out of range in HandlerScope");
1070
0
                }
1071
83
                if (ws->cur_frame->local_types[l->index] != MVM_reg_obj) {
1072
0
                    cleanup_all(vm, ws);
1073
0
                    DIE(vm, "MAST::Local for HandlerScope must be an object");
1074
0
                }
1075
83
1076
83
                /* Stash local index. */
1077
83
                ws->cur_frame->handlers[i].local = (unsigned short)l->index;
1078
83
            }
1079
0
            else {
1080
0
                cleanup_all(vm, ws);
1081
0
                DIE(vm, "MAST::Local required for HandlerScope invoke action");
1082
0
            }
1083
83
        }
1084
217
        else if (hs->action == HANDLER_UNWIND_GOTO || hs->action == HANDLER_UNWIND_GOTO_OBJ) {
1085
217
            ws->cur_frame->handlers[i].local = 0;
1086
217
        }
1087
0
        else {
1088
0
            cleanup_all(vm, ws);
1089
0
            DIE(vm, "Invalid action code for handler scope");
1090
0
        }
1091
300
    }
1092
0
    else {
1093
0
        cleanup_all(vm, ws);
1094
0
        DIE(vm, "Invalid MAST node in instruction list (must be Op, ExtOp, Call, Label, or Annotated)");
1095
0
    }
1096
269k
    ws->current_ins_idx++;
1097
269k
}
1098
1099
/* Compiles a frame. */
1100
4.04k
static void compile_frame(VM, WriterState *ws, MASTNode *node, unsigned short idx) {
1101
4.04k
    MAST_Frame  *f;
1102
4.04k
    FrameState  *fs;
1103
4.04k
    unsigned int i, num_ins, instructions_start;
1104
4.04k
    MASTNode *last_inst = NULL;
1105
4.04k
    MVMuint16 num_slvs;
1106
4.04k
1107
4.04k
    /* Ensure we have a node of the right type. */
1108
4.04k
    if (!ISTYPE(vm, node, ws->types->Frame)) {
1109
0
        cleanup_all(vm, ws);
1110
0
        DIE(vm, "Child of CompUnit must be a Frame");
1111
0
    }
1112
4.04k
    f = GET_Frame(node);
1113
4.04k
1114
4.04k
    /* Allocate frame state. */
1115
4.04k
    fs = ws->cur_frame    = (FrameState *)MVM_malloc(sizeof(FrameState));
1116
4.04k
    fs->bytecode_start    = ws->bytecode_pos;
1117
4.04k
    fs->frame_start       = ws->frame_pos;
1118
4.04k
    fs->labels            = NULL;
1119
4.04k
    fs->num_labels        = 0;
1120
4.04k
    fs->alloc_labels      = 0;
1121
4.04k
    fs->unresolved_labels = 0;
1122
4.04k
1123
4.04k
    /* Count locals and lexicals. */
1124
4.04k
    fs->num_locals   = ELEMS(vm, f->local_types);
1125
4.04k
    fs->num_lexicals = ELEMS(vm, f->lexical_types);
1126
4.04k
1127
4.04k
    if (fs->num_locals > (1 << 16)) {
1128
0
        cleanup_all(vm, ws);
1129
0
        DIE(vm, "Too many locals in this frame.");
1130
0
    }
1131
4.04k
1132
4.04k
    if (ELEMS(vm, f->lexical_names) != fs->num_lexicals) {
1133
0
        cleanup_all(vm, ws);
1134
0
        DIE(vm, "Lexical types list and lexical names list have unequal length");
1135
0
    }
1136
4.04k
1137
4.04k
    /* initialize number of annotation */
1138
4.04k
    fs->num_annotations = 0;
1139
4.04k
1140
4.04k
    /* initialize number of handlers and handlers pointer */
1141
4.04k
    fs->num_handlers = 0;
1142
4.04k
    fs->handlers = NULL;
1143
4.04k
1144
4.04k
    /* Ensure space is available to write frame entry, and write the
1145
4.04k
     * header, apart from the bytecode length, which we'll fill in
1146
4.04k
     * later. */
1147
4.04k
    ensure_space(vm, &ws->frame_seg, &ws->frame_alloc, ws->frame_pos,
1148
4.04k
        FRAME_HEADER_SIZE + fs->num_locals * 2 + fs->num_lexicals * 6);
1149
4.04k
    write_int32(ws->frame_seg, ws->frame_pos, fs->bytecode_start);
1150
4.04k
    write_int32(ws->frame_seg, ws->frame_pos + 4, 0); /* Filled in later. */
1151
4.04k
    write_int32(ws->frame_seg, ws->frame_pos + 8, fs->num_locals);
1152
4.04k
    write_int32(ws->frame_seg, ws->frame_pos + 12, fs->num_lexicals);
1153
4.04k
    write_int32(ws->frame_seg, ws->frame_pos + 16,
1154
4.04k
        get_string_heap_index(vm, ws, f->cuuid));
1155
4.04k
    write_int32(ws->frame_seg, ws->frame_pos + 20,
1156
4.04k
        get_string_heap_index(vm, ws, f->name));
1157
4.04k
1158
4.04k
    /* Handle outer. The current index means "no outer". */
1159
4.04k
    if (ISTYPE(vm, f->outer, ws->types->Frame)) {
1160
1.53k
        /* First, see if we have the index cached. If not, go hunting. */
1161
1.53k
        if (((MAST_Frame *)f->outer)->flags & FRAME_FLAG_HAS_INDEX) {
1162
1.53k
            write_int16(ws->frame_seg, ws->frame_pos + 24,
1163
1.53k
                ((MAST_Frame *)f->outer)->index);
1164
1.53k
        }
1165
0
        else {
1166
0
            unsigned short j, found, num_frames;
1167
0
            found = 0;
1168
0
            num_frames = (unsigned short)ELEMS(vm, ws->cu->frames);
1169
0
            for (j = 0; j < num_frames; j++) {
1170
0
                if (ATPOS(vm, ws->cu->frames, j) == f->outer) {
1171
0
                    write_int16(ws->frame_seg, ws->frame_pos + 24, j);
1172
0
                    found = 1;
1173
0
                    break;
1174
0
                }
1175
0
            }
1176
0
            if (!found) {
1177
0
                cleanup_all(vm, ws);
1178
0
                DIE(vm, "Could not locate outer frame in frame list");
1179
0
            }
1180
0
        }
1181
1.53k
    }
1182
2.51k
    else {
1183
2.51k
        write_int16(ws->frame_seg, ws->frame_pos + 24, idx);
1184
2.51k
    }
1185
4.04k
1186
4.04k
    write_int32(ws->frame_seg, ws->frame_pos + 26, ws->annotation_pos);
1187
4.04k
    write_int32(ws->frame_seg, ws->frame_pos + 30, 0); /* number of annotation; fill in later */
1188
4.04k
    write_int32(ws->frame_seg, ws->frame_pos + 34, 0); /* number of handlers; fill in later */
1189
4.04k
    write_int16(ws->frame_seg, ws->frame_pos + 38, (MVMint16)f->flags);
1190
4.04k
    num_slvs = f->flags & FRAME_FLAG_HAS_SLV
1191
351
        ? (MVMuint16)ELEMS(vm, f->static_lex_values) / 4
1192
3.69k
        : 0;
1193
4.04k
    write_int16(ws->frame_seg, ws->frame_pos + 40, num_slvs);
1194
4.04k
1195
4.04k
    if (f->flags & FRAME_FLAG_HAS_CODE_OBJ) {
1196
409
        write_int32(ws->frame_seg, ws->frame_pos + 42, f->code_obj_sc_dep_idx + 1);
1197
409
        write_int32(ws->frame_seg, ws->frame_pos + 46, f->code_obj_sc_idx);
1198
409
    }
1199
3.63k
    else {
1200
3.63k
        write_int32(ws->frame_seg, ws->frame_pos + 42, 0);
1201
3.63k
        write_int32(ws->frame_seg, ws->frame_pos + 46, 0);
1202
3.63k
    }
1203
4.04k
1204
4.04k
    ws->frame_pos += FRAME_HEADER_SIZE;
1205
4.04k
1206
4.04k
    /* Write locals, as well as collecting our own array of type info. */
1207
4.04k
    fs->local_types = (short unsigned int *)MVM_malloc(sizeof(unsigned short) * fs->num_locals);
1208
59.8k
    for (i = 0; i < fs->num_locals; i++) {
1209
55.8k
        unsigned short local_type = type_to_local_type(vm, ws, ATPOS(vm, f->local_types, i));
1210
55.8k
        fs->local_types[i] = local_type;
1211
55.8k
        write_int16(ws->frame_seg, ws->frame_pos, local_type);
1212
55.8k
        ws->frame_pos += 2;
1213
55.8k
    }
1214
4.04k
1215
4.04k
    /* Write lexicals. */
1216
4.04k
    fs->lexical_types = (short unsigned int *)MVM_malloc(sizeof(unsigned short) * fs->num_lexicals);
1217
6.84k
    for (i = 0; i < fs->num_lexicals; i++) {
1218
2.80k
        unsigned short lexical_type = type_to_local_type(vm, ws, ATPOS(vm, f->lexical_types, i));
1219
2.80k
        fs->lexical_types[i] = lexical_type;
1220
2.80k
        write_int16(ws->frame_seg, ws->frame_pos, lexical_type);
1221
2.80k
        ws->frame_pos += 2;
1222
2.80k
        write_int32(ws->frame_seg, ws->frame_pos,
1223
2.80k
            get_string_heap_index(vm, ws, ATPOS_S_C(vm, f->lexical_names, i)));
1224
2.80k
        ws->frame_pos += 4;
1225
2.80k
    }
1226
4.04k
1227
4.04k
    /* Save the location of the start of instructions */
1228
4.04k
    instructions_start = ws->bytecode_pos;
1229
4.04k
1230
4.04k
    /* Compile the instructions. */
1231
4.04k
    ws->current_ins_idx = 0;
1232
4.04k
    num_ins = ELEMS(vm, f->instructions);
1233
204k
    for (i = 0; i < num_ins; i++)
1234
200k
        compile_instruction(vm, ws, last_inst = ATPOS(vm, f->instructions, i));
1235
4.04k
1236
4.04k
    /* Fixup frames that don't have a return instruction, so
1237
4.04k
     * we don't have to check against bytecode length every
1238
4.04k
     * time through the runloop. */
1239
4.04k
    if (!last_inst || !ISTYPE(vm, last_inst, ws->types->Op)
1240
4.04k
            || (   GET_Op(last_inst)->op != MVM_OP_return
1241
3.79k
                && GET_Op(last_inst)->op != MVM_OP_return_i
1242
3.67k
                && GET_Op(last_inst)->op != MVM_OP_return_n
1243
3.64k
                && GET_Op(last_inst)->op != MVM_OP_return_s
1244
3.49k
                && GET_Op(last_inst)->op != MVM_OP_return_o
1245
0
            )) {
1246
0
        ensure_space(vm, &ws->bytecode_seg, &ws->bytecode_alloc, ws->bytecode_pos, 2);
1247
0
        write_int16(ws->bytecode_seg, ws->bytecode_pos, MVM_OP_return);
1248
0
        ws->bytecode_pos += 2;
1249
0
    }
1250
4.04k
1251
4.04k
    /* Fill in bytecode length. */
1252
4.04k
    write_int32(ws->frame_seg, fs->frame_start + 4, ws->bytecode_pos - instructions_start);
1253
4.04k
1254
4.04k
    /* Fill in number of annotations. */
1255
4.04k
    write_int32(ws->frame_seg, fs->frame_start + 30, fs->num_annotations);
1256
4.04k
1257
4.04k
    /* Fill in number of handlers. */
1258
4.04k
    write_int32(ws->frame_seg, fs->frame_start + 34, fs->num_handlers);
1259
4.04k
1260
4.04k
    /* Write handlers. */
1261
4.34k
    for (i = 0; i < fs->num_handlers; i++) {
1262
300
        ensure_space(vm, &ws->frame_seg, &ws->frame_alloc, ws->frame_pos,
1263
300
            FRAME_HANDLER_SIZE);
1264
300
        write_int32(ws->frame_seg, ws->frame_pos, fs->handlers[i].start_offset);
1265
300
        ws->frame_pos += 4;
1266
300
        write_int32(ws->frame_seg, ws->frame_pos, fs->handlers[i].end_offset);
1267
300
        ws->frame_pos += 4;
1268
300
        write_int32(ws->frame_seg, ws->frame_pos, fs->handlers[i].category_mask);
1269
300
        ws->frame_pos += 4;
1270
300
        write_int16(ws->frame_seg, ws->frame_pos, fs->handlers[i].action);
1271
300
        ws->frame_pos += 2;
1272
300
        write_int16(ws->frame_seg, ws->frame_pos, fs->handlers[i].local);
1273
300
        ws->frame_pos += 2;
1274
300
        if (ws->cur_frame->handlers[i].label)
1275
300
            write_int32(ws->frame_seg, ws->frame_pos,
1276
300
                demand_label_offset(vm, ws, GET_Label(fs->handlers[i].label),
1277
300
                    "HandlerScope uses unresolved label"));
1278
300
        else
1279
0
            write_int32(ws->frame_seg, ws->frame_pos, 0);
1280
300
        ws->frame_pos += 4;
1281
300
        if (fs->handlers[i].category_mask & MVM_EX_CAT_LABELED) {
1282
23
            ensure_space(vm, &ws->frame_seg, &ws->frame_alloc, ws->frame_pos, 2);
1283
23
            write_int16(ws->frame_seg, ws->frame_pos, fs->handlers[i].label_reg);
1284
23
            ws->frame_pos += 2;
1285
23
        }
1286
300
    }
1287
4.04k
1288
4.04k
    /* Write static lex values. */
1289
4.04k
    ensure_space(vm, &ws->frame_seg, &ws->frame_alloc, ws->frame_pos,
1290
4.04k
        FRAME_SLV_SIZE * num_slvs);
1291
4.97k
    for (i = 0; i < num_slvs; i++) {
1292
930
        write_int16(ws->frame_seg, ws->frame_pos,
1293
930
            (MVMuint16)ATPOS_I(vm, f->static_lex_values, 4 * i));
1294
930
        write_int16(ws->frame_seg, ws->frame_pos + 2,
1295
930
            (MVMuint16)ATPOS_I(vm, f->static_lex_values, 4 * i + 1));
1296
930
        write_int32(ws->frame_seg, ws->frame_pos + 4,
1297
930
            (MVMuint32)ATPOS_I(vm, f->static_lex_values, 4 * i + 2));
1298
930
        write_int32(ws->frame_seg, ws->frame_pos + 8,
1299
930
            (MVMuint32)ATPOS_I(vm, f->static_lex_values, 4 * i + 3));
1300
930
        ws->frame_pos += FRAME_SLV_SIZE;
1301
930
    }
1302
4.04k
1303
4.04k
    /* Any leftover labels? */
1304
4.04k
    if (fs->unresolved_labels) {
1305
0
        cleanup_all(vm, ws);
1306
0
        DIE(vm, "Frame has %u unresolved labels", fs->unresolved_labels);
1307
0
    }
1308
4.04k
1309
4.04k
    /* Free the frame state. */
1310
4.04k
    cleanup_frame(vm, fs);
1311
4.04k
    ws->cur_frame = NULL;
1312
4.04k
1313
4.04k
    /* Increment frame count. */
1314
4.04k
    ws->num_frames++;
1315
4.04k
}
1316
1317
/* Takes all of the strings and joins them into a heap, encoding them as
1318
 * UTF-8. */
1319
1.18k
static char * form_string_heap(VM, WriterState *ws, unsigned int *string_heap_size) {
1320
1.18k
    char         *heap;
1321
1.18k
    unsigned int  i, num_strings, heap_size, heap_alloc;
1322
1.18k
1323
1.18k
    /* If we've nothing to do, just return immediately. */
1324
1.18k
    num_strings = ELEMS(vm, ws->strings);
1325
1.18k
    if (num_strings == 0) {
1326
0
        *string_heap_size = 0;
1327
0
        return NULL;
1328
0
    }
1329
1.18k
1330
1.18k
    /* Allocate heap starting point (just a guess). */
1331
1.18k
    heap_size = 0;
1332
1.18k
    heap_alloc = num_strings * 32;
1333
1.18k
    heap = (char *)MVM_malloc(heap_alloc);
1334
1.18k
1335
1.18k
    /* Add each string to the heap. */
1336
33.7k
    for (i = 0; i < num_strings; i++) {
1337
32.5k
        MVMuint64 bytelen;
1338
32.5k
        char *encoded;
1339
32.5k
        MVMGraphemeIter gi;
1340
32.5k
        unsigned short align;
1341
32.5k
        unsigned int need;
1342
32.5k
1343
32.5k
        /* Decide if we can get away with Latin-1 with an assumption of the
1344
32.5k
         * string already being in NFG. Latin-1 is except \r, which we also
1345
32.5k
         * check for here. */
1346
32.5k
        MVMint32   need_utf8 = 0;
1347
32.5k
        MVMString *str       = ATPOS_S(vm, ws->strings, i);
1348
32.5k
        MVM_string_gi_init(tc, &gi, str);
1349
400k
        while (MVM_string_gi_has_more(tc, &gi)) {
1350
368k
            MVMGrapheme32 g = MVM_string_gi_get_grapheme(tc, &gi);
1351
368k
            if (g < 0 || g >= 0xFF || g == 0x0D) {
1352
76
                need_utf8 = 1;
1353
76
                break;
1354
76
            }
1355
368k
        }
1356
32.5k
1357
32.5k
        /* Encode it with the chosen algorithm. */
1358
32.5k
        encoded = need_utf8
1359
76
            ? MVM_string_utf8_encode(tc, str, &bytelen, 0)
1360
32.4k
            : MVM_string_latin1_encode(tc, str, &bytelen, 0);
1361
32.5k
        if (bytelen > 0x3FFFFFFF) {
1362
0
            cleanup_all(vm, ws);
1363
0
            DIE(vm, "String too long for string constants segment");
1364
0
        }
1365
32.5k
1366
32.5k
        /* Ensure we have space. */
1367
22.6k
        align = bytelen & 3 ? 4 - (bytelen & 3) : 0;
1368
32.5k
        need  = 4 + bytelen + align;
1369
32.5k
        if (heap_size + need >= heap_alloc) {
1370
0
            heap_alloc = umax(heap_alloc * 2, heap_size + need);
1371
0
            heap = (char *)MVM_realloc(heap, heap_alloc);
1372
0
        }
1373
32.5k
1374
32.5k
        /* Write byte length and UTF-8 flag into heap. */
1375
32.5k
        write_int32(heap, heap_size, (bytelen << 1) | need_utf8);
1376
32.5k
        heap_size += 4;
1377
32.5k
1378
32.5k
        /* Write string. */
1379
32.5k
        memcpy(heap + heap_size, encoded, bytelen);
1380
32.5k
        MVM_free(encoded);
1381
32.5k
        heap_size += bytelen;
1382
32.5k
1383
32.5k
        /* Add alignment. Whilst we never read this memory, it's useful to
1384
32.5k
           ensure it is initialised, otherwise valgrind (and similar tools)
1385
32.5k
           will rightly complain that we're writing garbage to disk. */
1386
32.5k
        if (align) {
1387
22.6k
            memset(heap + heap_size, 0, align);
1388
22.6k
            heap_size += align;
1389
22.6k
        }
1390
32.5k
    }
1391
1.18k
1392
1.18k
    *string_heap_size = heap_size;
1393
1.18k
    return heap;
1394
1.18k
}
1395
1396
/* Takes all the pieces and forms the bytecode output. */
1397
1.18k
static char * form_bytecode_output(VM, WriterState *ws, unsigned int *bytecode_size) {
1398
1.18k
    MVMuint32     size    = 0;
1399
1.18k
    MVMuint32     pos     = 0;
1400
1.18k
    char         *output;
1401
1.18k
    unsigned int  string_heap_size;
1402
1.18k
    char         *string_heap;
1403
1.18k
    unsigned int  hll_str_idx;
1404
1.18k
1405
1.18k
    /* Store HLL name string, if any. */
1406
1.18k
    if (!VM_STRING_IS_NULL(ws->cu->hll))
1407
1.09k
        hll_str_idx = get_string_heap_index(vm, ws, ws->cu->hll);
1408
1.18k
    else
1409
86
        hll_str_idx = get_string_heap_index(vm, ws, EMPTY_STRING(vm));
1410
1.18k
1411
1.18k
    /* Build string heap. */
1412
1.18k
    string_heap = form_string_heap(vm, ws, &string_heap_size);
1413
1.18k
1414
1.18k
    /* Work out total size. */
1415
1.18k
    size += MVM_ALIGN_SECTION(HEADER_SIZE);
1416
1.18k
    size += MVM_ALIGN_SECTION(string_heap_size);
1417
1.18k
    size += MVM_ALIGN_SECTION(ws->scdep_bytes);
1418
1.18k
    size += MVM_ALIGN_SECTION(ws->extops_bytes);
1419
1.18k
    size += MVM_ALIGN_SECTION(ws->frame_pos);
1420
1.18k
    size += MVM_ALIGN_SECTION(ws->callsite_pos);
1421
1.18k
    size += MVM_ALIGN_SECTION(ws->bytecode_pos);
1422
1.18k
    size += MVM_ALIGN_SECTION(ws->annotation_pos);
1423
1.18k
    if (vm->serialized)
1424
0
        size += MVM_ALIGN_SECTION(vm->serialized_size);
1425
1.18k
1426
1.18k
    /* Allocate space for the bytecode output. */
1427
1.18k
    output = (char *)MVM_calloc(1, size);
1428
1.18k
1429
1.18k
    /* Generate start of header. */
1430
1.18k
    memcpy(output, "MOARVM\r\n", 8);
1431
1.18k
    write_int32(output, 8, BYTECODE_VERSION);
1432
1.18k
    pos += MVM_ALIGN_SECTION(HEADER_SIZE);
1433
1.18k
1434
1.18k
    /* Add SC dependencies section and its header entries. */
1435
1.18k
    write_int32(output, SCDEP_HEADER_OFFSET, pos);
1436
1.18k
    write_int32(output, SCDEP_HEADER_OFFSET + 4, ELEMS(vm, ws->cu->sc_handles));
1437
1.18k
    memcpy(output + pos, ws->scdep_seg, ws->scdep_bytes);
1438
1.18k
    pos += MVM_ALIGN_SECTION(ws->scdep_bytes);
1439
1.18k
1440
1.18k
    /* Add extension ops section and its header entries. */
1441
1.18k
    write_int32(output, EXTOP_HEADER_OFFSET, pos);
1442
1.18k
    write_int32(output, EXTOP_HEADER_OFFSET + 4, ws->num_extops);
1443
1.18k
    memcpy(output + pos, ws->extops_seg, ws->extops_bytes);
1444
1.18k
    pos += MVM_ALIGN_SECTION(ws->extops_bytes);
1445
1.18k
1446
1.18k
    /* Add frames section and its header entries. */
1447
1.18k
    write_int32(output, FRAME_HEADER_OFFSET, pos);
1448
1.18k
    write_int32(output, FRAME_HEADER_OFFSET + 4, ws->num_frames);
1449
1.18k
    memcpy(output + pos, ws->frame_seg, ws->frame_pos);
1450
1.18k
    pos += MVM_ALIGN_SECTION(ws->frame_pos);
1451
1.18k
1452
1.18k
    /* Add callsites section and its header entries. */
1453
1.18k
    write_int32(output, CALLSITE_HEADER_OFFSET, pos);
1454
1.18k
    write_int32(output, CALLSITE_HEADER_OFFSET + 4, ws->num_callsites);
1455
1.18k
    memcpy(output + pos, ws->callsite_seg, ws->callsite_pos);
1456
1.18k
    pos += MVM_ALIGN_SECTION(ws->callsite_pos);
1457
1.18k
1458
1.18k
    /* Add strings heap section and its header entries. */
1459
1.18k
    write_int32(output, STRING_HEADER_OFFSET, pos);
1460
1.18k
    write_int32(output, STRING_HEADER_OFFSET + 4, ELEMS(vm, ws->strings));
1461
1.18k
    memcpy(output + pos, string_heap, string_heap_size);
1462
1.18k
    pos += MVM_ALIGN_SECTION(string_heap_size);
1463
1.18k
    if (string_heap) {
1464
1.18k
        MVM_free(string_heap);
1465
1.18k
        string_heap = NULL;
1466
1.18k
    }
1467
1.18k
1468
1.18k
    /* SC data. Write it if we have it. */
1469
1.18k
    if (vm->serialized) {
1470
0
        write_int32(output, SCDATA_HEADER_OFFSET, pos);
1471
0
        write_int32(output, SCDATA_HEADER_OFFSET + 4, vm->serialized_size);
1472
0
        memcpy(output + pos, vm->serialized, vm->serialized_size);
1473
0
        pos += MVM_ALIGN_SECTION(vm->serialized_size);
1474
0
        MVM_free(vm->serialized);
1475
0
        vm->serialized = NULL;
1476
0
        vm->serialized_size = 0;
1477
0
    }
1478
1.18k
1479
1.18k
    /* Add bytecode section and its header entries (offset, length). */
1480
1.18k
    write_int32(output, BYTECODE_HEADER_OFFSET, pos);
1481
1.18k
    write_int32(output, BYTECODE_HEADER_OFFSET + 4, ws->bytecode_pos);
1482
1.18k
    memcpy(output + pos, ws->bytecode_seg, ws->bytecode_pos);
1483
1.18k
    pos += MVM_ALIGN_SECTION(ws->bytecode_pos);
1484
1.18k
1485
1.18k
    /* Add annotation section and its header entries (offset, length). */
1486
1.18k
    write_int32(output, ANNOTATION_HEADER_OFFSET, pos);
1487
1.18k
    write_int32(output, ANNOTATION_HEADER_OFFSET + 4, ws->annotation_pos);
1488
1.18k
    memcpy(output + pos, ws->annotation_seg, ws->annotation_pos);
1489
1.18k
    pos += MVM_ALIGN_SECTION(ws->annotation_pos);
1490
1.18k
1491
1.18k
    /* Add HLL and special frame indexes. */
1492
1.18k
    write_int32(output, HLL_NAME_HEADER_OFFSET, hll_str_idx);
1493
1.18k
    if (VM_OBJ_IS_NULL(ws->cu->main_frame))
1494
1.05k
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET, 0);
1495
1.18k
    else
1496
130
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET, 1 + get_frame_index(vm, ws, ws->cu->main_frame));
1497
1.18k
    if (VM_OBJ_IS_NULL(ws->cu->load_frame))
1498
1.05k
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET + 4, 0);
1499
1.18k
    else
1500
130
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET + 4, 1 + get_frame_index(vm, ws, ws->cu->load_frame));
1501
1.18k
    if (VM_OBJ_IS_NULL(ws->cu->deserialize_frame))
1502
111
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET + 8, 0);
1503
1.18k
    else
1504
1.07k
        write_int32(output, SPECIAL_FRAME_HEADER_OFFSET + 8, 1 + get_frame_index(vm, ws, ws->cu->deserialize_frame));
1505
1.18k
1506
1.18k
    /* Sanity...should never fail. */
1507
1.18k
    if (pos != size)
1508
0
        DIE(vm, "Bytecode generated did not match expected size");
1509
1.18k
1510
1.18k
    *bytecode_size = size;
1511
1.18k
    return output;
1512
1.18k
}
1513
1514
/* Main entry point to the MAST to bytecode compiler. */
1515
1.18k
char * MVM_mast_compile(VM, MASTNode *node, MASTNodeTypes *types, unsigned int *size) {
1516
1.18k
    MAST_CompUnit  *cu;
1517
1.18k
    WriterState    *ws;
1518
1.18k
    char           *bytecode;
1519
1.18k
    unsigned short  i, num_depscs, num_frames;
1520
1.18k
    unsigned int    bytecode_size;
1521
1.18k
1522
1.18k
    /* Ensure we have a compilation unit. */
1523
1.18k
    if (!ISTYPE(vm, node, types->CompUnit))
1524
0
        DIE(vm, "Top-level MAST node must be a CompUnit");
1525
1.18k
    cu = GET_CompUnit(node);
1526
1.18k
1527
1.18k
    /* Initialize the writer state structure. */
1528
1.18k
    ws = (WriterState *)MVM_malloc(sizeof(WriterState));
1529
1.18k
    ws->types            = types;
1530
1.18k
    ws->strings          = NEWLIST_S(vm);
1531
1.18k
    ws->seen_strings     = NEWHASH(vm);
1532
1.18k
    ws->cur_frame        = NULL;
1533
1.18k
    ws->scdep_bytes      = ELEMS(vm, cu->sc_handles) * SC_DEP_SIZE;
1534
1.10k
    ws->scdep_seg        = ws->scdep_bytes ? (char *)MVM_malloc(ws->scdep_bytes) : NULL;
1535
1.18k
    ws->num_extops       = ELEMS(vm, cu->extop_names);
1536
1.18k
    ws->extops_bytes     = ws->num_extops * EXTOP_SIZE;
1537
1.18k
    ws->extops_seg       = (char *)MVM_malloc(ws->extops_bytes);
1538
1.18k
    ws->frame_pos        = 0;
1539
1.18k
    ws->frame_alloc      = 192 * ELEMS(vm, cu->frames);
1540
1.18k
    ws->frame_seg        = (char *)MVM_malloc(ws->frame_alloc);
1541
1.18k
    ws->num_frames       = 0;
1542
1.18k
    ws->callsite_pos     = 0;
1543
1.18k
    ws->callsite_alloc   = 4096;
1544
1.18k
    ws->callsite_seg     = (char *)MVM_malloc(ws->callsite_alloc);
1545
1.18k
    ws->num_callsites    = 0;
1546
1.18k
    ws->bytecode_pos     = 0;
1547
1.18k
    ws->bytecode_alloc   = 128 * ELEMS(vm, cu->frames);
1548
1.18k
    ws->bytecode_seg     = (char *)MVM_malloc(ws->bytecode_alloc);
1549
1.18k
    ws->annotation_pos   = 0;
1550
1.18k
    ws->annotation_alloc = 64 * ELEMS(vm, cu->frames);
1551
1.18k
    ws->annotation_seg   = (char *)MVM_malloc(ws->annotation_alloc);
1552
1.18k
    ws->cu               = cu;
1553
1.18k
    ws->current_frame_idx= 0;
1554
1.18k
1555
1.18k
    /* If we have any strings from serializing, then we'll seed our own string
1556
1.18k
     * heap with them. This means the compilation unit string heap will align
1557
1.18k
     * perfectly with what the serialization blob needs, and thus we can use
1558
1.18k
     * it in deserialization. Note we use get_string_heap_index for its side
1559
1.18k
     * effects only here. Start from 1, as 0 means NULL string. */
1560
1.18k
    if (vm->serialized_string_heap) {
1561
0
        MVMint64 elems = ELEMS(vm, vm->serialized_string_heap);
1562
0
        for (i = 1; i < elems; i++)
1563
0
            (void)get_string_heap_index(vm, ws, ATPOS_S(vm, vm->serialized_string_heap, i));
1564
0
        vm->serialized_string_heap = NULL;
1565
0
    }
1566
1.18k
1567
1.18k
    /* Initialize callsite reuse cache */
1568
1.18k
    ws->callsite_reuse_head = NULL;
1569
1.18k
1570
1.18k
    /* Store each of the dependent SCs. */
1571
1.18k
    num_depscs = ELEMS(vm, ws->cu->sc_handles);
1572
3.35k
    for (i = 0; i < num_depscs; i++)
1573
2.17k
        write_int32(ws->scdep_seg, i * SC_DEP_SIZE,
1574
2.17k
            get_string_heap_index(vm, ws,
1575
2.17k
                ATPOS_S_C(vm, ws->cu->sc_handles, i)));
1576
1.18k
1577
1.18k
    /* Store each of the extop names and signatures. */
1578
1.18k
    for (i = 0; i < ws->num_extops; i++) {
1579
0
        MASTNode *sig_array;
1580
0
        int num_operands, j;
1581
0
1582
0
        write_int32(ws->extops_seg, i * EXTOP_SIZE,
1583
0
            get_string_heap_index(vm, ws,
1584
0
                ATPOS_S_C(vm, ws->cu->extop_names, i)));
1585
0
1586
0
        sig_array = ATPOS(vm, ws->cu->extop_sigs, i);
1587
0
        num_operands = ELEMS(vm, sig_array);
1588
0
        for (j = 0; j < 8; j++)
1589
0
            write_int8(ws->extops_seg, i * EXTOP_SIZE + 4 + j,
1590
0
                j < num_operands
1591
0
                    ? ATPOS_I(vm, sig_array, j)
1592
0
                    : 0);
1593
0
    }
1594
1.18k
1595
1.18k
    /* Visit and compile each of the frames. */
1596
1.18k
    num_frames = (unsigned short)ELEMS(vm, cu->frames);
1597
5.22k
    for (i = 0; i < num_frames; i++)
1598
4.04k
        compile_frame(vm, ws, ATPOS(vm, cu->frames, i), ws->current_frame_idx = i);
1599
1.18k
1600
1.18k
    /* Join all the pieces into a bytecode file. */
1601
1.18k
    bytecode = form_bytecode_output(vm, ws, &bytecode_size);
1602
1.18k
1603
1.18k
    /* Cleanup and hand back result. */
1604
1.18k
    cleanup_all(vm, ws);
1605
1.18k
1606
1.18k
    *size = bytecode_size;
1607
1.18k
    return bytecode;
1608
1.18k
}