Coverage Report

Created: 2018-07-03 15:31

/home/travis/build/MoarVM/MoarVM/src/6model/reprs/NFA.c
Line
Count
Source (jump to first uncovered line)
1
#include "moar.h"
2
3
/* This representation's function pointer table. */
4
static const MVMREPROps NFA_this_repr;
5
6
/* Creates a new type object of this representation, and associates it with
7
 * the given HOW. */
8
2
static MVMObject * type_object_for(MVMThreadContext *tc, MVMObject *HOW) {
9
2
    MVMSTable *st = MVM_gc_allocate_stable(tc, &NFA_this_repr, HOW);
10
2
11
2
    MVMROOT(tc, st, {
12
2
        MVMObject *obj = MVM_gc_allocate_type_object(tc, st);
13
2
        MVM_ASSIGN_REF(tc, &(st->header), st->WHAT, obj);
14
2
        st->size = sizeof(MVMNFA);
15
2
    });
16
2
17
2
    return st->WHAT;
18
2
}
19
20
/* Copies the body of one object to another. */
21
0
static void copy_to(MVMThreadContext *tc, MVMSTable *st, void *src, MVMObject *dest_root, void *dest) {
22
0
    MVM_exception_throw_adhoc(tc, "Cannot copy object with representation NFA");
23
0
}
24
25
/* Called by the VM to mark any GCable items. */
26
3.02k
static void gc_mark(MVMThreadContext *tc, MVMSTable *st, void *data, MVMGCWorklist *worklist) {
27
3.02k
    MVMNFABody *lb = (MVMNFABody *)data;
28
3.02k
    MVMint64 i, j;
29
3.02k
30
3.02k
    MVM_gc_worklist_add(tc, worklist, &lb->fates);
31
3.02k
32
97.3k
    for (i = 0; i < lb->num_states; i++) {
33
94.3k
        MVMint64 edges = lb->num_state_edges[i];
34
253k
        for (j = 0; j < edges; j++) {
35
159k
            switch (lb->states[i][j].act) {
36
18.2k
                case MVM_NFA_EDGE_CHARLIST:
37
18.2k
                case MVM_NFA_EDGE_CHARLIST_NEG:
38
18.2k
                    MVM_gc_worklist_add(tc, worklist, &lb->states[i][j].arg.s);
39
159k
            }
40
159k
        }
41
94.3k
    }
42
3.02k
}
43
44
/* Called by the VM in order to free memory associated with this object. */
45
0
static void gc_free(MVMThreadContext *tc, MVMObject *obj) {
46
0
    MVMNFA *nfa = (MVMNFA *)obj;
47
0
    MVMint64 i;
48
0
    for (i = 0; i < nfa->body.num_states; i++)
49
0
        if (nfa->body.num_state_edges[i])
50
0
            MVM_fixed_size_free(tc, tc->instance->fsa, nfa->body.num_state_edges[i] * sizeof(MVMNFAStateInfo), nfa->body.states[i]);
51
0
    MVM_fixed_size_free(tc, tc->instance->fsa, nfa->body.num_states * sizeof(MVMNFAStateInfo *), nfa->body.states);
52
0
    MVM_fixed_size_free(tc, tc->instance->fsa, nfa->body.num_states * sizeof(MVMint64), nfa->body.num_state_edges);
53
0
}
54
55
56
static const MVMStorageSpec storage_spec = {
57
    MVM_STORAGE_SPEC_REFERENCE, /* inlineable */
58
    0,                          /* bits */
59
    0,                          /* align */
60
    MVM_STORAGE_SPEC_BP_NONE,   /* boxed_primitive */
61
    0,                          /* can_box */
62
    0,                          /* is_unsigned */
63
};
64
65
66
/* Gets the storage specification for this representation. */
67
0
static const MVMStorageSpec * get_storage_spec(MVMThreadContext *tc, MVMSTable *st) {
68
0
    return &storage_spec;
69
0
}
70
71
/* Serializes the data. */
72
1
static void serialize(MVMThreadContext *tc, MVMSTable *st, void *data, MVMSerializationWriter *writer) {
73
1
    MVMNFABody *body = (MVMNFABody *)data;
74
1
    MVMint64 i, j;
75
1
76
1
    /* Write fates. */
77
1
    MVM_serialization_write_ref(tc, writer, body->fates);
78
1
79
1
    /* Write number of states. */
80
1
    MVM_serialization_write_int(tc, writer, body->num_states);
81
1
82
1
    /* Write state edge list counts, skipping synthetic start node. */
83
5
    for (i = 0; i < body->num_states; i++) {
84
4
        MVMint64 sig_edges = body->num_state_edges[i];
85
4
        if (sig_edges && body->states[i][0].act == MVM_NFA_EDGE_SYNTH_CP_COUNT)
86
0
            sig_edges--;
87
4
        MVM_serialization_write_int(tc, writer, sig_edges);
88
4
    }
89
1
90
1
    /* Write state graph. */
91
5
    for (i = 0; i < body->num_states; i++) {
92
8
        for (j = 0; j < body->num_state_edges[i]; j++) {
93
4
            MVMint64 act = body->states[i][j].act;
94
4
            if (act == MVM_NFA_EDGE_SYNTH_CP_COUNT)
95
0
                continue;
96
4
            MVM_serialization_write_int(tc, writer, act);
97
4
            MVM_serialization_write_int(tc, writer, body->states[i][j].to);
98
4
            switch (act & 0xff) {
99
1
                case MVM_NFA_EDGE_FATE:
100
1
                    MVM_serialization_write_int(tc, writer, body->states[i][j].arg.i);
101
1
                    break;
102
3
                case MVM_NFA_EDGE_CODEPOINT:
103
3
                case MVM_NFA_EDGE_CODEPOINT_LL:
104
3
                case MVM_NFA_EDGE_CODEPOINT_NEG:
105
3
                case MVM_NFA_EDGE_CODEPOINT_M:
106
3
                case MVM_NFA_EDGE_CODEPOINT_M_NEG: {
107
3
                    MVMGrapheme32 g = body->states[i][j].arg.g;
108
3
                    if (g >= 0) {
109
3
                        /* Non-synthetic. */
110
3
                        MVM_serialization_write_int(tc, writer, g);
111
3
                    }
112
0
                    else {
113
0
                        /* Synthetic. Write the number of codepoints negated,
114
0
                         * and then each of the codepoints. */
115
0
                        MVMNFGSynthetic *si = MVM_nfg_get_synthetic_info(tc, g);
116
0
                        MVMint32 k;
117
0
                        MVM_serialization_write_int(tc, writer, -(si->num_codes));
118
0
                        for (k = 0; k < si->num_codes; k++)
119
0
                            MVM_serialization_write_int(tc, writer, si->codes[k]);
120
0
                    }
121
3
                    break;
122
3
                }
123
0
                case MVM_NFA_EDGE_CHARCLASS:
124
0
                case MVM_NFA_EDGE_CHARCLASS_NEG:
125
0
                    MVM_serialization_write_int(tc, writer, body->states[i][j].arg.i);
126
0
                    break;
127
0
                case MVM_NFA_EDGE_CHARLIST:
128
0
                case MVM_NFA_EDGE_CHARLIST_NEG:
129
0
                    MVM_serialization_write_str(tc, writer, body->states[i][j].arg.s);
130
0
                    break;
131
0
                case MVM_NFA_EDGE_CODEPOINT_I:
132
0
                case MVM_NFA_EDGE_CODEPOINT_I_LL:
133
0
                case MVM_NFA_EDGE_CODEPOINT_I_NEG:
134
0
                case MVM_NFA_EDGE_CODEPOINT_IM:
135
0
                case MVM_NFA_EDGE_CODEPOINT_IM_NEG:
136
0
                case MVM_NFA_EDGE_CHARRANGE:
137
0
                case MVM_NFA_EDGE_CHARRANGE_NEG:
138
0
                case MVM_NFA_EDGE_CHARRANGE_M:
139
0
                case MVM_NFA_EDGE_CHARRANGE_M_NEG: {
140
0
                    MVM_serialization_write_int(tc, writer, body->states[i][j].arg.uclc.lc);
141
0
                    MVM_serialization_write_int(tc, writer, body->states[i][j].arg.uclc.uc);
142
0
                    break;
143
0
                }
144
4
            }
145
4
        }
146
4
    }
147
1
}
148
149
/* Go through each state and see if there are at least four edges involving
150
 * a simple codepoint match. If so, sort them to the start and stick in a
151
 * synthetic edge which indicates the number of codepoint edges ahead. This
152
 * means that our matching can binary search with the codepoint it has, and
153
 * skip over any inappropriate edges. */
154
122k
static int classify_edge(MVMNFAStateInfo *e) {
155
122k
    switch (e->act) {
156
5.83k
        case MVM_NFA_EDGE_SYNTH_CP_COUNT:
157
5.83k
            return 0;
158
91.4k
        case MVM_NFA_EDGE_CODEPOINT:
159
91.4k
        case MVM_NFA_EDGE_CODEPOINT_LL:
160
91.4k
            return 1;
161
24.9k
        default:
162
24.9k
            return 2;
163
122k
    }
164
122k
}
165
61.0k
static int opt_edge_comp(const void *av, const void *bv) {
166
61.0k
    MVMNFAStateInfo *a = (MVMNFAStateInfo *)av;
167
61.0k
    MVMNFAStateInfo *b = (MVMNFAStateInfo *)bv;
168
61.0k
    MVMint32 type_a = classify_edge(a);
169
61.0k
    MVMint32 type_b = classify_edge(b);
170
61.0k
    if (type_a < type_b)
171
8.15k
        return -1;
172
52.9k
    if (type_a > type_b)
173
7.40k
        return 1;
174
45.5k
    if (type_a == 1) {
175
13.6k
        return a->arg.g < b->arg.g ? -1 :
176
24.7k
               a->arg.g > b->arg.g ?  1 :
177
9.13k
                                      0;
178
38.3k
    }
179
7.12k
    else {
180
7.12k
        return 0;
181
7.12k
    }
182
45.5k
}
183
5.20k
static void sort_states_and_add_synth_cp_node(MVMThreadContext *tc, MVMNFABody *body) {
184
5.20k
    MVMint64 s;
185
181k
    for (s = 0; s < body->num_states; s++) {
186
176k
        /* See if there's enough interesting edges to do the opt. */
187
176k
        MVMint32 applicable_edges = 0;
188
176k
        MVMint64 num_orig_edges = body->num_state_edges[s];
189
176k
        if (num_orig_edges >= 4) {
190
8.39k
            MVMint64 e;
191
61.0k
            for (e = 0; e < num_orig_edges; e++) {
192
52.6k
                MVMint64 act = body->states[s][e].act;
193
52.6k
                if (act == MVM_NFA_EDGE_CODEPOINT || act == MVM_NFA_EDGE_CODEPOINT_LL)
194
21.2k
                    applicable_edges++;
195
52.6k
            }
196
8.39k
        }
197
176k
198
176k
        /* If enough edges, insert synthetic and so the sort. */
199
176k
        if (applicable_edges >= 4) {
200
1.82k
            MVMint64 num_new_edges = num_orig_edges + 1;
201
1.82k
            MVMNFAStateInfo *new_edges = MVM_fixed_size_alloc(tc, tc->instance->fsa,
202
1.82k
                num_new_edges * sizeof(MVMNFAStateInfo));
203
1.82k
            new_edges[0].act = MVM_NFA_EDGE_SYNTH_CP_COUNT;
204
1.82k
            new_edges[0].arg.i = applicable_edges;
205
1.82k
            memcpy(new_edges + 1, body->states[s], num_orig_edges * sizeof(MVMNFAStateInfo));
206
1.82k
            qsort(new_edges, num_new_edges, sizeof(MVMNFAStateInfo), opt_edge_comp);
207
1.82k
            MVM_fixed_size_free(tc, tc->instance->fsa, num_orig_edges * sizeof(MVMNFAStateInfo),
208
1.82k
                    body->states[s]);
209
1.82k
            body->states[s] = new_edges;
210
1.82k
            body->num_state_edges[s] = num_new_edges;
211
1.82k
        }
212
176k
    }
213
5.20k
}
214
215
/* Deserializes the data. */
216
1
static void deserialize(MVMThreadContext *tc, MVMSTable *st, MVMObject *root, void *data, MVMSerializationReader *reader) {
217
1
    MVMNFABody *body = (MVMNFABody *)data;
218
1
    MVMint64 i, j;
219
1
220
1
    /* Read fates. */
221
1
    body->fates = MVM_serialization_read_ref(tc, reader);
222
1
223
1
    /* Read number of states. */
224
1
    body->num_states = MVM_serialization_read_int(tc, reader);
225
1
226
1
    if (body->num_states > 0) {
227
1
        /* Read state edge list counts. */
228
1
        body->num_state_edges = MVM_fixed_size_alloc(tc, tc->instance->fsa, body->num_states * sizeof(MVMint64));
229
5
        for (i = 0; i < body->num_states; i++)
230
4
            body->num_state_edges[i] = MVM_serialization_read_int(tc, reader);
231
1
232
1
        /* Read state graph. */
233
1
        body->states = MVM_fixed_size_alloc(tc, tc->instance->fsa, body->num_states * sizeof(MVMNFAStateInfo *));
234
5
        for (i = 0; i < body->num_states; i++) {
235
4
            MVMint64 edges = body->num_state_edges[i];
236
4
            if (edges > 0) {
237
4
                body->states[i] = MVM_fixed_size_alloc(tc, tc->instance->fsa, edges * sizeof(MVMNFAStateInfo));
238
4
            }
239
8
            for (j = 0; j < edges; j++) {
240
4
                body->states[i][j].act = MVM_serialization_read_int(tc, reader);
241
4
                body->states[i][j].to = MVM_serialization_read_int(tc, reader);
242
4
                switch (body->states[i][j].act & 0xff) {
243
1
                    case MVM_NFA_EDGE_FATE:
244
1
                        body->states[i][j].arg.i = MVM_serialization_read_int(tc, reader);
245
1
                        break;
246
3
                    case MVM_NFA_EDGE_CODEPOINT:
247
3
                    case MVM_NFA_EDGE_CODEPOINT_LL:
248
3
                    case MVM_NFA_EDGE_CODEPOINT_NEG:
249
3
                    case MVM_NFA_EDGE_CODEPOINT_M:
250
3
                    case MVM_NFA_EDGE_CODEPOINT_M_NEG: {
251
3
                        MVMint64 cp_or_synth_count = MVM_serialization_read_int(tc, reader);
252
3
                        if (cp_or_synth_count >= 0) {
253
3
                            body->states[i][j].arg.g = (MVMGrapheme32)cp_or_synth_count;
254
3
                        }
255
0
                        else {
256
0
                            MVMint32 num_codes = -cp_or_synth_count;
257
0
                            MVMCodepoint *codes = MVM_fixed_size_alloc(tc, tc->instance->fsa, num_codes * sizeof(MVMCodepoint));
258
0
                            MVMint32 k;
259
0
                            for (k = 0; k < num_codes; k++)
260
0
                                codes[k] = (MVMCodepoint)MVM_serialization_read_int(tc, reader);
261
0
                            body->states[i][j].arg.g = MVM_nfg_codes_to_grapheme(tc, codes, num_codes);
262
0
                            MVM_fixed_size_free(tc, tc->instance->fsa, num_codes * sizeof(MVMCodepoint), codes);
263
0
                        }
264
3
                        break;
265
3
                    }
266
0
                    case MVM_NFA_EDGE_CHARCLASS:
267
0
                    case MVM_NFA_EDGE_CHARCLASS_NEG:
268
0
                        body->states[i][j].arg.i = MVM_serialization_read_int(tc, reader);
269
0
                        break;
270
0
                    case MVM_NFA_EDGE_CHARLIST:
271
0
                    case MVM_NFA_EDGE_CHARLIST_NEG:
272
0
                        MVM_ASSIGN_REF(tc, &(root->header), body->states[i][j].arg.s, MVM_serialization_read_str(tc, reader));
273
0
                        break;
274
0
                    case MVM_NFA_EDGE_CODEPOINT_I:
275
0
                    case MVM_NFA_EDGE_CODEPOINT_I_LL:
276
0
                    case MVM_NFA_EDGE_CODEPOINT_I_NEG:
277
0
                    case MVM_NFA_EDGE_CODEPOINT_IM:
278
0
                    case MVM_NFA_EDGE_CODEPOINT_IM_NEG:
279
0
                    case MVM_NFA_EDGE_CHARRANGE:
280
0
                    case MVM_NFA_EDGE_CHARRANGE_NEG:
281
0
                    case MVM_NFA_EDGE_CHARRANGE_M:
282
0
                    case MVM_NFA_EDGE_CHARRANGE_M_NEG: {
283
0
                        body->states[i][j].arg.uclc.lc = MVM_serialization_read_int(tc, reader);
284
0
                        body->states[i][j].arg.uclc.uc = MVM_serialization_read_int(tc, reader);
285
0
                        break;
286
0
                    }
287
4
                }
288
4
            }
289
4
        }
290
1
    }
291
1
292
1
    sort_states_and_add_synth_cp_node(tc, body);
293
1
}
294
295
/* Compose the representation. */
296
2
static void compose(MVMThreadContext *tc, MVMSTable *st, MVMObject *info) {
297
2
    /* Nothing to do for this REPR. */
298
2
}
299
300
/* Set the size of the STable. */
301
144
static void deserialize_stable_size(MVMThreadContext *tc, MVMSTable *st, MVMSerializationReader *reader) {
302
144
    st->size = sizeof(MVMNFA);
303
144
}
304
305
/* Calculates the non-GC-managed memory we hold on to. */
306
1.05k
static MVMuint64 unmanaged_size(MVMThreadContext *tc, MVMSTable *st, void *data) {
307
1.05k
    MVMNFABody *body = (MVMNFABody *)data;
308
1.05k
    MVMuint64 total;
309
1.05k
    MVMint64 i;
310
1.05k
311
1.05k
    total = body->num_states * sizeof(MVMint64); /* for num_state_edges */
312
1.05k
    total += body->num_states * sizeof(MVMNFAStateInfo *); /* for states level 1 */
313
33.6k
    for (i = 0; i < body->num_states; i++)
314
32.6k
        total += body->num_state_edges[i] * sizeof(MVMNFAStateInfo);
315
1.05k
316
1.05k
    return total;
317
1.05k
}
318
319
/* Initializes the representation. */
320
144
const MVMREPROps * MVMNFA_initialize(MVMThreadContext *tc) {
321
144
    return &NFA_this_repr;
322
144
}
323
324
static const MVMREPROps NFA_this_repr = {
325
    type_object_for,
326
    MVM_gc_allocate_object,
327
    NULL, /* initialize */
328
    copy_to,
329
    MVM_REPR_DEFAULT_ATTR_FUNCS,
330
    MVM_REPR_DEFAULT_BOX_FUNCS,
331
    MVM_REPR_DEFAULT_POS_FUNCS,
332
    MVM_REPR_DEFAULT_ASS_FUNCS,
333
    MVM_REPR_DEFAULT_ELEMS,
334
    get_storage_spec,
335
    NULL, /* change_type */
336
    serialize,
337
    deserialize,
338
    NULL, /* serialize_repr_data */
339
    NULL, /* deserialize_repr_data */
340
    deserialize_stable_size,
341
    gc_mark,
342
    gc_free,
343
    NULL, /* gc_cleanup */
344
    NULL, /* gc_mark_repr_data */
345
    NULL, /* gc_free_repr_data */
346
    compose,
347
    NULL, /* spesh */
348
    "NFA", /* name */
349
    MVM_REPR_ID_NFA,
350
    unmanaged_size,
351
    NULL, /* describe_refs */
352
};
353
354
/* We may be provided a grapheme as a codepoint for non-synthetics, or as a
355
 * 1-char string for synthetics. */
356
125k
static MVMGrapheme32 get_grapheme(MVMThreadContext *tc, MVMObject *obj) {
357
125k
    /* Handle null and non-concrete case. */
358
125k
    if (MVM_UNLIKELY(MVM_is_null(tc, obj) || !IS_CONCRETE(obj))) {
359
0
        MVM_exception_throw_adhoc(tc,
360
0
            "NFA must be provided with a concrete string or integer for graphemes");
361
0
    }
362
125k
363
125k
    /* Otherwise, guess something appropriate. */
364
125k
    else {
365
125k
        const MVMStorageSpec *ss = REPR(obj)->get_storage_spec(tc, STABLE(obj));
366
125k
        if (ss->can_box & MVM_STORAGE_SPEC_CAN_BOX_INT)
367
125k
            return REPR(obj)->box_funcs.get_int(tc, STABLE(obj), obj, OBJECT_BODY(obj));
368
0
        else if (ss->can_box & MVM_STORAGE_SPEC_CAN_BOX_STR)
369
0
            return MVM_string_get_grapheme_at(tc,
370
0
                REPR(obj)->box_funcs.get_str(tc, STABLE(obj), obj, OBJECT_BODY(obj)),
371
0
                0);
372
0
        else
373
0
            MVM_exception_throw_adhoc(tc,
374
0
                "NFA must be provided with a string or integer for graphemes");
375
125k
    }
376
125k
}
377
378
5.20k
MVMObject * MVM_nfa_from_statelist(MVMThreadContext *tc, MVMObject *states, MVMObject *nfa_type) {
379
5.20k
    MVMObject  *nfa_obj;
380
5.20k
    MVMNFABody *nfa;
381
5.20k
    MVMint64    i, j, num_states;
382
5.20k
383
5.20k
    MVMROOT2(tc, states, nfa_type, {
384
5.20k
        /* Create NFA object. */
385
5.20k
        nfa_obj = MVM_repr_alloc_init(tc, nfa_type);
386
5.20k
        nfa = (MVMNFABody *)OBJECT_BODY(nfa_obj);
387
5.20k
388
5.20k
        /* The first state entry is the fates list. */
389
5.20k
        nfa->fates = MVM_repr_at_pos_o(tc, states, 0);
390
5.20k
391
5.20k
        /* Go over the rest and convert to the NFA. */
392
5.20k
        num_states = MVM_repr_elems(tc, states) - 1;
393
5.20k
        nfa->num_states = num_states;
394
5.20k
        if (num_states > 0) {
395
5.20k
            nfa->num_state_edges = MVM_fixed_size_alloc_zeroed(tc, tc->instance->fsa, num_states * sizeof(MVMint64));
396
5.20k
            nfa->states = MVM_fixed_size_alloc_zeroed(tc, tc->instance->fsa, num_states * sizeof(MVMNFAStateInfo *));
397
5.20k
        }
398
5.20k
        for (i = 0; i < num_states; i++) {
399
5.20k
            MVMObject *edge_info = MVM_repr_at_pos_o(tc, states, i + 1);
400
5.20k
            MVMint64   elems     = MVM_repr_elems(tc, edge_info);
401
5.20k
            MVMint64   edges     = elems / 3;
402
5.20k
            MVMint64   cur_edge  = 0;
403
5.20k
404
5.20k
            nfa->num_state_edges[i] = edges;
405
5.20k
            if (edges > 0) {
406
5.20k
                nfa->states[i] = MVM_fixed_size_alloc(tc, tc->instance->fsa, edges * sizeof(MVMNFAStateInfo));
407
5.20k
            }
408
5.20k
409
5.20k
            for (j = 0; j < elems; j += 3) {
410
5.20k
                MVMint64 act = MVM_coerce_simple_intify(tc,
411
5.20k
                    MVM_repr_at_pos_o(tc, edge_info, j));
412
5.20k
                MVMint64 to  = MVM_coerce_simple_intify(tc,
413
5.20k
                    MVM_repr_at_pos_o(tc, edge_info, j + 2));
414
5.20k
                if (to <= 0 && act != MVM_NFA_EDGE_FATE)
415
5.20k
                    MVM_exception_throw_adhoc(tc, "Invalid to edge %"PRId64" in NFA statelist", to);
416
5.20k
417
5.20k
                nfa->states[i][cur_edge].act = act;
418
5.20k
                nfa->states[i][cur_edge].to = to;
419
5.20k
420
5.20k
                switch (act & 0xff) {
421
5.20k
                case MVM_NFA_EDGE_FATE:
422
5.20k
                    nfa->states[i][cur_edge].arg.i = MVM_coerce_simple_intify(tc,
423
5.20k
                        MVM_repr_at_pos_o(tc, edge_info, j + 1));
424
5.20k
                    break;
425
5.20k
                case MVM_NFA_EDGE_CODEPOINT:
426
5.20k
                case MVM_NFA_EDGE_CODEPOINT_LL:
427
5.20k
                case MVM_NFA_EDGE_CODEPOINT_NEG:
428
5.20k
                case MVM_NFA_EDGE_CODEPOINT_M:
429
5.20k
                case MVM_NFA_EDGE_CODEPOINT_M_NEG:
430
5.20k
                    nfa->states[i][cur_edge].arg.g = get_grapheme(tc,
431
5.20k
                        MVM_repr_at_pos_o(tc, edge_info, j + 1));
432
5.20k
                    break;
433
5.20k
                case MVM_NFA_EDGE_CHARCLASS:
434
5.20k
                case MVM_NFA_EDGE_CHARCLASS_NEG:
435
5.20k
                    nfa->states[i][cur_edge].arg.i = MVM_coerce_simple_intify(tc,
436
5.20k
                        MVM_repr_at_pos_o(tc, edge_info, j + 1));
437
5.20k
                    break;
438
5.20k
                case MVM_NFA_EDGE_CHARLIST:
439
5.20k
                case MVM_NFA_EDGE_CHARLIST_NEG:
440
5.20k
                    MVM_ASSIGN_REF(tc, &(nfa_obj->header),
441
5.20k
                        nfa->states[i][cur_edge].arg.s,
442
5.20k
                        MVM_repr_get_str(tc, MVM_repr_at_pos_o(tc, edge_info, j + 1)));
443
5.20k
                    break;
444
5.20k
                case MVM_NFA_EDGE_CODEPOINT_I:
445
5.20k
                case MVM_NFA_EDGE_CODEPOINT_I_LL:
446
5.20k
                case MVM_NFA_EDGE_CODEPOINT_I_NEG:
447
5.20k
                case MVM_NFA_EDGE_CODEPOINT_IM:
448
5.20k
                case MVM_NFA_EDGE_CODEPOINT_IM_NEG:
449
5.20k
                /* That is not about uppercase/lowercase here, but lower and upper bounds
450
5.20k
                   of our range. */
451
5.20k
                case MVM_NFA_EDGE_CHARRANGE:
452
5.20k
                case MVM_NFA_EDGE_CHARRANGE_NEG:
453
5.20k
                case MVM_NFA_EDGE_CHARRANGE_M:
454
5.20k
                case MVM_NFA_EDGE_CHARRANGE_M_NEG: {
455
5.20k
                    MVMObject *arg = MVM_repr_at_pos_o(tc, edge_info, j + 1);
456
5.20k
                    nfa->states[i][cur_edge].arg.uclc.lc = MVM_coerce_simple_intify(tc,
457
5.20k
                        MVM_repr_at_pos_o(tc, arg, 0));
458
5.20k
                    nfa->states[i][cur_edge].arg.uclc.uc = MVM_coerce_simple_intify(tc,
459
5.20k
                        MVM_repr_at_pos_o(tc, arg, 1));
460
5.20k
                    break;
461
5.20k
                }
462
5.20k
                }
463
5.20k
464
5.20k
                cur_edge++;
465
5.20k
            }
466
5.20k
        }
467
5.20k
    });
468
5.20k
469
5.20k
    sort_states_and_add_synth_cp_node(tc, nfa);
470
5.20k
471
5.20k
    return nfa_obj;
472
5.20k
}
473
474
/* Does a run of the NFA. Produces a list of integers indicating the
475
 * chosen ordering. */
476
6.52M
static MVMint32 in_done(MVMuint32 *done, MVMuint32 numdone, MVMuint32 st) {
477
6.52M
    MVMuint32 i = 0;
478
26.7M
    for (i = 0; i < numdone; i++)
479
20.2M
        if (done[i] == st)
480
12.3k
            return 1;
481
6.51M
    return 0;
482
6.52M
}
483
484
713k
static MVMint64 * nqp_nfa_run(MVMThreadContext *tc, MVMNFABody *nfa, MVMString *target, MVMint64 offset, MVMint64 *total_fates_out) {
485
713k
    MVMint64  eos     = MVM_string_graphs(tc, target);
486
713k
    MVMuint32 gen     = 1;
487
713k
    MVMint64  numcur  = 0;
488
713k
    MVMint64  numnext = 0;
489
713k
    MVMint64  numdone = 0;
490
713k
    MVMuint32 *done, *curst, *nextst;
491
713k
    MVMint64  *fates, *longlit;
492
713k
    MVMint64  i, fate_arr_len, num_states, total_fates, prev_fates, usedlonglit;
493
713k
    MVMint64  orig_offset = offset;
494
713k
    /* We used a cached grapheme iterator since we often request the same
495
713k
     * grapheme multiple times, most common after that is requesting the next
496
713k
     * grapheme. */
497
713k
    MVMGraphemeIter_cached gic;
498
713k
    int nfadeb = tc->instance->nfa_debug_enabled;
499
713k
500
713k
    /* Obtain or (re)allocate "done states", "current states" and "next
501
713k
     * states" arrays. */
502
713k
    num_states = nfa->num_states;
503
713k
    if (tc->nfa_alloc_states < num_states) {
504
287
        size_t alloc   = (num_states + 1) * sizeof(MVMuint32);
505
287
        tc->nfa_done   = (MVMuint32 *)MVM_realloc(tc->nfa_done, alloc);
506
287
        tc->nfa_curst  = (MVMuint32 *)MVM_realloc(tc->nfa_curst, alloc);
507
287
        tc->nfa_nextst = (MVMuint32 *)MVM_realloc(tc->nfa_nextst, alloc);
508
287
        tc->nfa_alloc_states = num_states;
509
287
    }
510
713k
    done   = tc->nfa_done;
511
713k
    curst  = tc->nfa_curst;
512
713k
    nextst = tc->nfa_nextst;
513
713k
514
713k
    /* Allocate fates array. */
515
713k
    fate_arr_len = 1 + MVM_repr_elems(tc, nfa->fates);
516
713k
    if (tc->nfa_fates_len < fate_arr_len) {
517
587
        tc->nfa_fates     = (MVMint64 *)MVM_realloc(tc->nfa_fates, sizeof(MVMint64) * fate_arr_len);
518
587
        tc->nfa_fates_len = fate_arr_len;
519
587
    }
520
713k
    fates = tc->nfa_fates;
521
713k
    total_fates = 0;
522
713k
    if (MVM_UNLIKELY(nfadeb)) fprintf(stderr,"======================================\nStarting with %d fates in %d states\n", (int)fate_arr_len, (int)num_states) ;
523
713k
524
713k
    /* longlit will be updated on a fate whenever NFA passes through final char of a literal. */
525
713k
    /* These edges are specially marked to indicate which fate they influence the fate of. */
526
713k
    if (tc->nfa_longlit_len < fate_arr_len) {
527
587
        tc->nfa_longlit = (MVMint64 *)MVM_realloc(tc->nfa_longlit, sizeof(MVMint64) * fate_arr_len);
528
587
        tc->nfa_longlit_len  = fate_arr_len;
529
587
    }
530
713k
    longlit = tc->nfa_longlit;
531
713k
    usedlonglit = 0;
532
713k
533
713k
    nextst[numnext++] = 1;
534
713k
    /* In NFA could be called with a string that has 0 graphemes in it. Guard
535
713k
     * against this so we don't init for the empty string. */
536
713k
    if (target->body.num_graphs) MVM_string_gi_cached_init(tc, &gic, target, 0);
537
713k
538
2.32M
    while (numnext && offset <= eos) {
539
1.61M
        /* Swap next and current */
540
1.61M
        MVMuint32 *temp = curst;
541
1.61M
        curst   = nextst;
542
1.61M
        nextst  = temp;
543
1.61M
        numcur  = numnext;
544
1.61M
        numnext = 0;
545
1.61M
        numdone = 0;
546
1.61M
547
1.61M
        /* Save how many fates we have before this position is considered. */
548
1.61M
        prev_fates = total_fates;
549
1.61M
550
1.61M
        if (MVM_UNLIKELY(nfadeb)) {
551
0
            if (offset < eos) {
552
0
                MVMGrapheme32 cp = MVM_string_get_grapheme_at_nocheck(tc, target, offset);
553
0
                fprintf(stderr,"%c with %ds target %lx offset %"PRId64"\n",cp,(int)numcur, (long)target, offset);
554
0
            }
555
0
            else {
556
0
                fprintf(stderr,"EOS with %ds\n",(int)numcur);
557
0
            }
558
0
        }
559
5.93M
        while (numcur) {
560
4.32M
            MVMNFAStateInfo *edge_info;
561
4.32M
            MVMint64         edge_info_elems;
562
4.32M
563
4.32M
            MVMint64 st = curst[--numcur];
564
4.32M
            if (st <= num_states) {
565
4.32M
                if (in_done(done, numdone, st))
566
5.89k
                    continue;
567
4.31M
                done[numdone++] = st;
568
4.31M
            }
569
4.32M
570
4.31M
            edge_info = nfa->states[st - 1];
571
4.31M
            edge_info_elems = nfa->num_state_edges[st - 1];
572
4.31M
            if (MVM_UNLIKELY(nfadeb))
573
0
                fprintf(stderr,"\t%"PRIi64"\t%"PRIi64"\t", st, edge_info_elems);
574
15.0M
            for (i = 0; i < edge_info_elems; i++) {
575
10.7M
                MVMint64 act = edge_info[i].act;
576
10.7M
                MVMint64 to  = edge_info[i].to;
577
10.7M
578
10.7M
                /* All the special cases are under one test. */
579
10.7M
                if (act <= MVM_NFA_EDGE_EPSILON) {
580
4.57M
                    if (act < 0) {
581
1.45M
                        /* Negative indicates a fate is encoded in the act of the codepoint edge. */
582
1.45M
                        /* These will redispatch to one of the _LL cases below */
583
1.45M
                        act &= 0xff;
584
1.45M
                    }
585
3.12M
                    else if (act == MVM_NFA_EDGE_FATE) {
586
926k
                        /* Crossed a fate edge. Check if we already saw this fate, and
587
926k
                         * if so remove the entry so we can re-add at the new token length. */
588
926k
                        MVMint64 arg = edge_info[i].arg.i;
589
926k
                        MVMint64 j;
590
926k
                        MVMint64 found_fate = 0;
591
926k
                        if (MVM_UNLIKELY(nfadeb))
592
0
                            fprintf(stderr, "fate(%016llx) ", (long long unsigned int)arg);
593
1.75M
                        for (j = 0; j < total_fates; j++) {
594
826k
                            if (found_fate)
595
80.4k
                                fates[j - found_fate] = fates[j];
596
826k
                            if ((fates[j] & 0xffffff) == arg) {
597
572k
                                found_fate++;
598
572k
                                if (j < prev_fates)
599
560k
                                    prev_fates--;
600
572k
                            }
601
826k
                        }
602
926k
                        total_fates -= found_fate;
603
926k
                        if (arg < usedlonglit)
604
155k
                            arg -= longlit[arg] << 24;
605
926k
                        if (MVM_UNLIKELY(++total_fates > fate_arr_len)) {
606
0
                            /* should never happen if nfa->fates is correct and dedup above works right */
607
0
                            fprintf(stderr, "oops adding %016llx to\n", (long long unsigned int)arg);
608
0
                            for (j = 0; j < total_fates - 1; j++) {
609
0
                                fprintf(stderr, "  %016llx\n", (long long unsigned int)fates[j]);
610
0
                            }
611
0
                            fate_arr_len      = total_fates + 10;
612
0
                            tc->nfa_fates     = (MVMint64 *)MVM_realloc(tc->nfa_fates,
613
0
                                sizeof(MVMint64) * fate_arr_len);
614
0
                            tc->nfa_fates_len = fate_arr_len;
615
0
                            fates             = tc->nfa_fates;
616
0
                        }
617
926k
                        /* a small insertion sort */
618
926k
                        j = total_fates - 1;
619
982k
                        while (--j >= prev_fates && fates[j] < arg) {
620
56.4k
                            fates[j + 1] = fates[j];
621
56.4k
                        }
622
926k
                        fates[++j] = arg;
623
926k
                        continue;
624
926k
                    }
625
2.20M
                    else if (act == MVM_NFA_EDGE_EPSILON && to <= num_states &&
626
2.20M
                            !in_done(done, numdone, to)) {
627
2.19M
                        if (to)
628
2.19M
                            curst[numcur++] = to;
629
0
                        else if (MVM_UNLIKELY(nfadeb))  /* XXX should turn into a "can't happen" after rebootstrap */
630
0
                            fprintf(stderr, "  oops, ignoring epsilon to 0\n");
631
2.19M
                        continue;
632
2.19M
                    }
633
4.57M
                }
634
10.7M
635
7.64M
                if (eos <= offset) {
636
24.0k
                    /* Can't match, so drop state. */
637
24.0k
                    continue;
638
24.0k
                }
639
7.62M
                else {
640
7.62M
                    switch (act) {
641
1.45M
                        case MVM_NFA_EDGE_CODEPOINT_LL: {
642
1.45M
                            const MVMGrapheme32 arg = edge_info[i].arg.g;
643
1.45M
                            if (MVM_string_gi_cached_get_grapheme(tc, &gic, offset) == arg) {
644
66.9k
                                MVMint64 fate = (edge_info[i].act >> 8) & 0xfffff;
645
66.9k
                                nextst[numnext++] = to;
646
388k
                                while (usedlonglit <= fate)
647
321k
                                    longlit[usedlonglit++] = 0;
648
66.9k
                                longlit[fate] = offset - orig_offset + 1;
649
66.9k
                                if (MVM_UNLIKELY(nfadeb))
650
0
                                    fprintf(stderr, "%d->%d ", (int)i, (int)to);
651
66.9k
                            }
652
1.45M
                            continue;
653
1.45M
                        }
654
2.40M
                        case MVM_NFA_EDGE_CODEPOINT: {
655
2.40M
                            const MVMGrapheme32 arg = edge_info[i].arg.g;
656
2.40M
                            if (MVM_string_gi_cached_get_grapheme(tc, &gic, offset) == arg) {
657
312k
                                nextst[numnext++] = to;
658
312k
                                if (MVM_UNLIKELY(nfadeb))
659
0
                                    fprintf(stderr, "%d->%d ", (int)i, (int)to);
660
312k
                            }
661
2.40M
                            continue;
662
1.45M
                        }
663
0
                        case MVM_NFA_EDGE_CODEPOINT_NEG: {
664
0
                            const MVMGrapheme32 arg = edge_info[i].arg.g;
665
0
                            if (MVM_string_gi_cached_get_grapheme(tc, &gic, offset) != arg)
666
0
                                nextst[numnext++] = to;
667
0
                            continue;
668
1.45M
                        }
669
1.09M
                        case MVM_NFA_EDGE_CHARCLASS: {
670
1.09M
                            const MVMint64 arg = edge_info[i].arg.i;
671
1.09M
                            if (MVM_string_grapheme_is_cclass(tc, arg, MVM_string_gi_cached_get_grapheme(tc, &gic, offset)))
672
565k
                                nextst[numnext++] = to;
673
1.09M
                            continue;
674
1.45M
                        }
675
110k
                        case MVM_NFA_EDGE_CHARCLASS_NEG: {
676
110k
                            const MVMint64 arg = edge_info[i].arg.i;
677
110k
                            if (!MVM_string_grapheme_is_cclass(tc, arg, MVM_string_gi_cached_get_grapheme(tc, &gic, offset)))
678
106k
                                nextst[numnext++] = to;
679
110k
                            continue;
680
1.45M
                        }
681
2.15M
                        case MVM_NFA_EDGE_CHARLIST: {
682
2.15M
                            MVMString *arg   = edge_info[i].arg.s;
683
2.15M
                            MVMGrapheme32 cp = MVM_string_gi_cached_get_grapheme(tc, &gic, offset);
684
2.15M
                            if (MVM_string_index_of_grapheme(tc, arg, cp) >= 0)
685
295k
                                nextst[numnext++] = to;
686
2.15M
                            continue;
687
1.45M
                        }
688
2.91k
                        case MVM_NFA_EDGE_CHARLIST_NEG: {
689
2.91k
                            MVMString *arg    = edge_info[i].arg.s;
690
2.91k
                            const MVMGrapheme32 cp = MVM_string_gi_cached_get_grapheme(tc, &gic, offset);
691
2.91k
                            if (MVM_string_index_of_grapheme(tc, arg, cp) < 0)
692
2.32k
                                nextst[numnext++] = to;
693
2.91k
                            continue;
694
1.45M
                        }
695
0
                        case MVM_NFA_EDGE_CODEPOINT_I_LL: {
696
0
                            const MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
697
0
                            const MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
698
0
                            const MVMGrapheme32 ord    = MVM_string_gi_cached_get_grapheme(tc, &gic, offset);
699
0
                            if (ord == lc_arg || ord == uc_arg) {
700
0
                                MVMint64 fate = (edge_info[i].act >> 8) & 0xfffff;
701
0
                                nextst[numnext++] = to;
702
0
                                while (usedlonglit <= fate)
703
0
                                    longlit[usedlonglit++] = 0;
704
0
                                longlit[fate] = offset - orig_offset + 1;
705
0
                            }
706
0
                            continue;
707
1.45M
                        }
708
0
                        case MVM_NFA_EDGE_CODEPOINT_I: {
709
0
                            MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
710
0
                            MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
711
0
                            MVMGrapheme32 ord    = MVM_string_gi_cached_get_grapheme(tc, &gic, offset);
712
0
                            if (ord == lc_arg || ord == uc_arg)
713
0
                                nextst[numnext++] = to;
714
0
                            continue;
715
1.45M
                        }
716
0
                        case MVM_NFA_EDGE_CODEPOINT_I_NEG: {
717
0
                            const MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
718
0
                            const MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
719
0
                            const MVMGrapheme32 ord    = MVM_string_gi_cached_get_grapheme(tc, &gic, offset);
720
0
                            if (ord != lc_arg && ord != uc_arg)
721
0
                                nextst[numnext++] = to;
722
0
                            continue;
723
1.45M
                        }
724
12.3k
                        case MVM_NFA_EDGE_CHARRANGE: {
725
12.3k
                            MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
726
12.3k
                            MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
727
12.3k
                            MVMGrapheme32 ord    = MVM_string_gi_cached_get_grapheme(tc, &gic, offset);
728
12.3k
                            if (ord >= lc_arg && ord <= uc_arg)
729
1.37k
                                nextst[numnext++] = to;
730
12.3k
                            continue;
731
1.45M
                        }
732
0
                        case MVM_NFA_EDGE_CHARRANGE_NEG: {
733
0
                            const MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
734
0
                            const MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
735
0
                            const MVMGrapheme32 ord    = MVM_string_gi_cached_get_grapheme(tc, &gic, offset);
736
0
                            if (ord < lc_arg || uc_arg < ord)
737
0
                                nextst[numnext++] = to;
738
0
                            continue;
739
1.45M
                        }
740
0
                        case MVM_NFA_EDGE_SUBRULE:
741
0
                            if (MVM_UNLIKELY(nfadeb))
742
0
                                fprintf(stderr, "IGNORING SUBRULE\n");
743
0
                            continue;
744
0
                        case MVM_NFA_EDGE_CODEPOINT_M:
745
0
                        case MVM_NFA_EDGE_CODEPOINT_M_NEG: {
746
0
                            MVMNormalizer norm;
747
0
                            MVMint32 ready;
748
0
                            MVMGrapheme32 ga = edge_info[i].arg.g;
749
0
                            MVMGrapheme32 gb = MVM_string_ord_basechar_at(tc, target, offset);
750
0
751
0
                            MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFD);
752
0
                            ready = MVM_unicode_normalizer_process_codepoint_to_grapheme(tc, &norm, ga, &ga);
753
0
                            MVM_unicode_normalizer_eof(tc, &norm);
754
0
                            if (!ready)
755
0
                                ga = MVM_unicode_normalizer_get_grapheme(tc, &norm);
756
0
757
0
                            if (((act == MVM_NFA_EDGE_CODEPOINT_M)     && (ga == gb))
758
0
                             || ((act == MVM_NFA_EDGE_CODEPOINT_M_NEG) && (ga != gb)))
759
0
                                nextst[numnext++] = to;
760
0
                            MVM_unicode_normalizer_cleanup(tc, &norm);
761
0
                            continue;
762
0
                        }
763
0
                        case MVM_NFA_EDGE_CODEPOINT_IM:
764
0
                        case MVM_NFA_EDGE_CODEPOINT_IM_NEG: {
765
0
                            MVMNormalizer norm;
766
0
                            MVMint32 ready;
767
0
                            MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
768
0
                            MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
769
0
                            const MVMGrapheme32 ord = MVM_string_ord_basechar_at(tc, target, offset);
770
0
771
0
                            MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFD);
772
0
                            ready = MVM_unicode_normalizer_process_codepoint_to_grapheme(tc, &norm, uc_arg, &uc_arg);
773
0
                            MVM_unicode_normalizer_eof(tc, &norm);
774
0
                            if (!ready)
775
0
                                uc_arg = MVM_unicode_normalizer_get_grapheme(tc, &norm);
776
0
                            MVM_unicode_normalizer_cleanup(tc, &norm);
777
0
778
0
                            MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFD);
779
0
                            ready = MVM_unicode_normalizer_process_codepoint_to_grapheme(tc, &norm, lc_arg, &lc_arg);
780
0
                            MVM_unicode_normalizer_eof(tc, &norm);
781
0
                            if (!ready)
782
0
                                lc_arg = MVM_unicode_normalizer_get_grapheme(tc, &norm);
783
0
784
0
                            if (((act == MVM_NFA_EDGE_CODEPOINT_IM)     && (ord == lc_arg || ord == uc_arg))
785
0
                             || ((act == MVM_NFA_EDGE_CODEPOINT_IM_NEG) && (ord != lc_arg && ord != uc_arg)))
786
0
                                nextst[numnext++] = to;
787
0
                            MVM_unicode_normalizer_cleanup(tc, &norm);
788
0
                            continue;
789
0
                        }
790
0
                        case MVM_NFA_EDGE_CHARRANGE_M: {
791
0
                            const MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
792
0
                            const MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
793
0
                            const MVMGrapheme32 ord    = MVM_string_ord_basechar_at(tc, target, offset);
794
0
                            if (ord >= lc_arg && ord <= uc_arg)
795
0
                                nextst[numnext++] = to;
796
0
                            continue;
797
0
                        }
798
0
                        case MVM_NFA_EDGE_CHARRANGE_M_NEG: {
799
0
                            const MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
800
0
                            const MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
801
0
                            const MVMGrapheme32 ord    = MVM_string_ord_basechar_at(tc, target, offset);
802
0
                            if (ord < lc_arg || uc_arg < ord)
803
0
                                nextst[numnext++] = to;
804
0
                            continue;
805
0
                        }
806
380k
                        case MVM_NFA_EDGE_SYNTH_CP_COUNT: {
807
380k
                            /* Binary search the edges ahead for the grapheme. */
808
380k
                            const MVMGrapheme32 search = MVM_string_gi_cached_get_grapheme(tc, &gic, offset);
809
380k
                            const MVMint64 num_possibilities = edge_info[i].arg.i;
810
380k
                            const MVMint64 end = i + num_possibilities;
811
380k
                            MVMint64 l = i + 1;
812
380k
                            MVMint64 r = end;
813
380k
                            MVMint64 found = -1;
814
1.61M
                            while (l <= r) {
815
1.27M
                                const MVMint64 m = l + (r - l) / 2;
816
1.27M
                                const MVMGrapheme32 test = edge_info[m].arg.g;
817
1.27M
                                if (test == search) {
818
39.8k
                                    /* We found it, but important we get the first edge
819
39.8k
                                     * that matches. */
820
39.8k
                                    found = m;
821
55.7k
                                    while (found > i + 1 && edge_info[found - 1].arg.g == search)
822
15.8k
                                        found--;
823
39.8k
                                    break;
824
39.8k
                                }
825
1.23M
                                if (test < search)
826
675k
                                    l = m + 1;
827
1.23M
                                else
828
562k
                                    r = m - 1;
829
1.23M
                            }
830
380k
                            if (found == -1) {
831
340k
                                /* Binary search failed to find a match, so just skip all
832
340k
                                 * the nodes. */
833
340k
                                i += num_possibilities;
834
340k
                            }
835
39.8k
                            else {
836
39.8k
                                /* Add all states that match. */
837
100k
                                while (found <= end && edge_info[found].arg.g == search) {
838
60.4k
                                    to = edge_info[found].to;
839
60.4k
                                    if (edge_info[found].act == MVM_NFA_EDGE_CODEPOINT) {
840
60.4k
                                        nextst[numnext++] = to;
841
60.4k
                                        if (MVM_UNLIKELY(nfadeb))
842
0
                                            fprintf(stderr, "%d->%d ", (int)found, (int)to);
843
60.4k
                                    }
844
0
                                    else {
845
0
                                        const MVMint64 fate = (edge_info[found].act >> 8) & 0xfffff;
846
0
                                        nextst[numnext++] = to;
847
0
                                        while (usedlonglit <= fate)
848
0
                                            longlit[usedlonglit++] = 0;
849
0
                                        longlit[fate] = offset - orig_offset + 1;
850
0
                                        if (MVM_UNLIKELY(nfadeb))
851
0
                                            fprintf(stderr, "%d->%d ", (int)found, (int)to);
852
0
                                    }
853
60.4k
                                    found++;
854
60.4k
                                }
855
39.8k
                                i += num_possibilities;
856
39.8k
                            }
857
380k
                            break;
858
0
                        }
859
7.62M
                    }
860
7.62M
                }
861
7.64M
            }
862
4.31M
            if (MVM_UNLIKELY(nfadeb)) fprintf(stderr,"\n");
863
4.31M
        }
864
1.61M
865
1.61M
        /* Move to next character and generation. */
866
1.61M
        offset++;
867
1.61M
        gen++;
868
1.61M
    }
869
713k
    /* strip any literal lengths, leaving only fates */
870
713k
    if (usedlonglit || nfadeb) {
871
35.6k
        if (MVM_UNLIKELY(nfadeb)) fprintf(stderr,"Final\n");
872
85.0k
        for (i = 0; i < total_fates; i++) {
873
49.3k
            if (MVM_UNLIKELY(nfadeb)) fprintf(stderr, "  %08llx\n", (long long unsigned int)fates[i]);
874
49.3k
            fates[i] &= 0xffffff;
875
49.3k
        }
876
35.6k
    }
877
713k
878
713k
    *total_fates_out = total_fates;
879
713k
    return fates;
880
713k
}
881
882
/* Takes an NFA, a target string in and an offset. Runs the NFA and returns
883
 * the order to try the fates in. */
884
322k
MVMObject * MVM_nfa_run_proto(MVMThreadContext *tc, MVMObject *nfa, MVMString *target, MVMint64 offset) {
885
322k
    /* Run the NFA. */
886
322k
    MVMint64  total_fates, i;
887
322k
    MVMint64 *fates = nqp_nfa_run(tc, (MVMNFABody *)OBJECT_BODY(nfa), target, offset, &total_fates);
888
322k
889
322k
    /* Copy results into an integer array. */
890
322k
    MVMObject *fateres = MVM_repr_alloc_init(tc, tc->instance->boot_types.BOOTIntArray);
891
423k
    for (i = 0; i < total_fates; i++)
892
100k
        MVM_repr_bind_pos_i(tc, fateres, i, fates[i]);
893
322k
894
322k
    return fateres;
895
322k
}
896
897
/* Takes an NFA, target string and offset. Runs the NFA, and uses the output
898
 * to update the bstack with backtracking points to try the alternation
899
 * branches in the correct order. The current capture stack is needed for its
900
 * height. */
901
void MVM_nfa_run_alt(MVMThreadContext *tc, MVMObject *nfa, MVMString *target,
902
390k
        MVMint64 offset, MVMObject *bstack, MVMObject *cstack, MVMObject *labels) {
903
390k
    /* Run the NFA. */
904
390k
    MVMint64  total_fates, i;
905
390k
    MVMint64 *fates = nqp_nfa_run(tc, (MVMNFABody *)OBJECT_BODY(nfa), target, offset, &total_fates);
906
390k
907
390k
    /* Push the results onto the bstack. */
908
390k
    MVMint64 caps = cstack && IS_CONCRETE(cstack)
909
16.2k
        ? MVM_repr_elems(tc, cstack)
910
374k
        : 0;
911
643k
    for (i = 0; i < total_fates; i++) {
912
253k
        MVM_repr_push_i(tc, bstack, MVM_repr_at_pos_i(tc, labels, fates[i]));
913
253k
        MVM_repr_push_i(tc, bstack, offset);
914
253k
        MVM_repr_push_i(tc, bstack, 0);
915
253k
        MVM_repr_push_i(tc, bstack, caps);
916
253k
    }
917
390k
}