Coverage Report

Created: 2017-04-15 07:07

/home/travis/build/MoarVM/MoarVM/src/6model/reprs/NFA.c
Line
Count
Source (jump to first uncovered line)
1
#include "moar.h"
2
3
/* This representation's function pointer table. */
4
static const MVMREPROps NFA_this_repr;
5
6
/* Creates a new type object of this representation, and associates it with
7
 * the given HOW. */
8
2
static MVMObject * type_object_for(MVMThreadContext *tc, MVMObject *HOW) {
9
2
    MVMSTable *st = MVM_gc_allocate_stable(tc, &NFA_this_repr, HOW);
10
2
11
2
    MVMROOT(tc, st, {
12
2
        MVMObject *obj = MVM_gc_allocate_type_object(tc, st);
13
2
        MVM_ASSIGN_REF(tc, &(st->header), st->WHAT, obj);
14
2
        st->size = sizeof(MVMNFA);
15
2
    });
16
2
17
2
    return st->WHAT;
18
2
}
19
20
/* Copies the body of one object to another. */
21
0
static void copy_to(MVMThreadContext *tc, MVMSTable *st, void *src, MVMObject *dest_root, void *dest) {
22
0
    MVM_exception_throw_adhoc(tc, "Cannot copy object with representation NFA");
23
0
}
24
25
/* Called by the VM to mark any GCable items. */
26
2.49k
static void gc_mark(MVMThreadContext *tc, MVMSTable *st, void *data, MVMGCWorklist *worklist) {
27
2.49k
    MVMNFABody *lb = (MVMNFABody *)data;
28
2.49k
    MVMint64 i, j;
29
2.49k
30
2.49k
    MVM_gc_worklist_add(tc, worklist, &lb->fates);
31
2.49k
32
80.4k
    for (i = 0; i < lb->num_states; i++) {
33
77.9k
        MVMint64 edges = lb->num_state_edges[i];
34
208k
        for (j = 0; j < edges; j++) {
35
130k
            switch (lb->states[i][j].act) {
36
15.0k
                case MVM_NFA_EDGE_CHARLIST:
37
15.0k
                case MVM_NFA_EDGE_CHARLIST_NEG:
38
15.0k
                    MVM_gc_worklist_add(tc, worklist, &lb->states[i][j].arg.s);
39
130k
            }
40
130k
        }
41
77.9k
    }
42
2.49k
}
43
44
/* Called by the VM in order to free memory associated with this object. */
45
0
static void gc_free(MVMThreadContext *tc, MVMObject *obj) {
46
0
    MVMNFA *nfa = (MVMNFA *)obj;
47
0
    MVMint64 i;
48
0
    for (i = 0; i < nfa->body.num_states; i++)
49
0
        if (nfa->body.num_state_edges[i])
50
0
            MVM_free(nfa->body.states[i]);
51
0
    MVM_free(nfa->body.states);
52
0
    MVM_free(nfa->body.num_state_edges);
53
0
}
54
55
56
static const MVMStorageSpec storage_spec = {
57
    MVM_STORAGE_SPEC_REFERENCE, /* inlineable */
58
    0,                          /* bits */
59
    0,                          /* align */
60
    MVM_STORAGE_SPEC_BP_NONE,   /* boxed_primitive */
61
    0,                          /* can_box */
62
    0,                          /* is_unsigned */
63
};
64
65
66
/* Gets the storage specification for this representation. */
67
0
static const MVMStorageSpec * get_storage_spec(MVMThreadContext *tc, MVMSTable *st) {
68
0
    return &storage_spec;
69
0
}
70
71
/* Serializes the data. */
72
1
static void serialize(MVMThreadContext *tc, MVMSTable *st, void *data, MVMSerializationWriter *writer) {
73
1
    MVMNFABody *body = (MVMNFABody *)data;
74
1
    MVMint64 i, j;
75
1
76
1
    /* Write fates. */
77
1
    MVM_serialization_write_ref(tc, writer, body->fates);
78
1
79
1
    /* Write number of states. */
80
1
    MVM_serialization_write_int(tc, writer, body->num_states);
81
1
82
1
    /* Write state edge list counts. */
83
5
    for (i = 0; i < body->num_states; i++)
84
4
        MVM_serialization_write_int(tc, writer, body->num_state_edges[i]);
85
1
86
1
    /* Write state graph. */
87
5
    for (i = 0; i < body->num_states; i++) {
88
8
        for (j = 0; j < body->num_state_edges[i]; j++) {
89
4
            MVM_serialization_write_int(tc, writer, body->states[i][j].act);
90
4
            MVM_serialization_write_int(tc, writer, body->states[i][j].to);
91
4
            switch (body->states[i][j].act & 0xff) {
92
1
                case MVM_NFA_EDGE_FATE:
93
1
                    MVM_serialization_write_int(tc, writer, body->states[i][j].arg.i);
94
1
                    break;
95
3
                case MVM_NFA_EDGE_CODEPOINT:
96
3
                case MVM_NFA_EDGE_CODEPOINT_LL:
97
3
                case MVM_NFA_EDGE_CODEPOINT_NEG:
98
3
                case MVM_NFA_EDGE_CODEPOINT_M:
99
3
                case MVM_NFA_EDGE_CODEPOINT_M_NEG: {
100
3
                    MVMGrapheme32 g = body->states[i][j].arg.g;
101
3
                    if (g >= 0) {
102
3
                        /* Non-synthetic. */
103
3
                        MVM_serialization_write_int(tc, writer, g);
104
3
                    }
105
0
                    else {
106
0
                        /* Synthetic. Write the number of codepoints negated,
107
0
                         * and then each of the codepoints. */
108
0
                        MVMNFGSynthetic *si = MVM_nfg_get_synthetic_info(tc, g);
109
0
                        MVMint32 k;
110
0
                        MVM_serialization_write_int(tc, writer, -(si->num_combs + 1));
111
0
                        MVM_serialization_write_int(tc, writer, si->base);
112
0
                        for (k = 0; k < si->num_combs; k++)
113
0
                            MVM_serialization_write_int(tc, writer, si->combs[k]);
114
0
                    }
115
3
                    break;
116
3
                }
117
0
                case MVM_NFA_EDGE_CHARCLASS:
118
0
                case MVM_NFA_EDGE_CHARCLASS_NEG:
119
0
                    MVM_serialization_write_int(tc, writer, body->states[i][j].arg.i);
120
0
                    break;
121
0
                case MVM_NFA_EDGE_CHARLIST:
122
0
                case MVM_NFA_EDGE_CHARLIST_NEG:
123
0
                    MVM_serialization_write_str(tc, writer, body->states[i][j].arg.s);
124
0
                    break;
125
0
                case MVM_NFA_EDGE_CODEPOINT_I:
126
0
                case MVM_NFA_EDGE_CODEPOINT_I_LL:
127
0
                case MVM_NFA_EDGE_CODEPOINT_I_NEG:
128
0
                case MVM_NFA_EDGE_CODEPOINT_IM:
129
0
                case MVM_NFA_EDGE_CODEPOINT_IM_NEG:
130
0
                case MVM_NFA_EDGE_CHARRANGE:
131
0
                case MVM_NFA_EDGE_CHARRANGE_NEG:
132
0
                case MVM_NFA_EDGE_CHARRANGE_M:
133
0
                case MVM_NFA_EDGE_CHARRANGE_M_NEG: {
134
0
                    MVM_serialization_write_int(tc, writer, body->states[i][j].arg.uclc.lc);
135
0
                    MVM_serialization_write_int(tc, writer, body->states[i][j].arg.uclc.uc);
136
0
                    break;
137
0
                }
138
4
            }
139
4
        }
140
4
    }
141
1
}
142
143
/* Deserializes the data. */
144
1
static void deserialize(MVMThreadContext *tc, MVMSTable *st, MVMObject *root, void *data, MVMSerializationReader *reader) {
145
1
    MVMNFABody *body = (MVMNFABody *)data;
146
1
    MVMint64 i, j;
147
1
148
1
    /* Read fates. */
149
1
    body->fates = MVM_serialization_read_ref(tc, reader);
150
1
151
1
    /* Read number of states. */
152
1
    body->num_states = MVM_serialization_read_int(tc, reader);
153
1
154
1
    if (body->num_states > 0) {
155
1
        /* Read state edge list counts. */
156
1
        body->num_state_edges = MVM_malloc(body->num_states * sizeof(MVMint64));
157
5
        for (i = 0; i < body->num_states; i++)
158
4
            body->num_state_edges[i] = MVM_serialization_read_int(tc, reader);
159
1
160
1
        /* Read state graph. */
161
1
        body->states = MVM_malloc(body->num_states * sizeof(MVMNFAStateInfo *));
162
5
        for (i = 0; i < body->num_states; i++) {
163
4
            MVMint64 edges = body->num_state_edges[i];
164
4
            if (edges > 0)
165
4
                body->states[i] = MVM_malloc(edges * sizeof(MVMNFAStateInfo));
166
8
            for (j = 0; j < edges; j++) {
167
4
                body->states[i][j].act = MVM_serialization_read_int(tc, reader);
168
4
                body->states[i][j].to = MVM_serialization_read_int(tc, reader);
169
4
                switch (body->states[i][j].act & 0xff) {
170
1
                    case MVM_NFA_EDGE_FATE:
171
1
                        body->states[i][j].arg.i = MVM_serialization_read_int(tc, reader);
172
1
                        break;
173
3
                    case MVM_NFA_EDGE_CODEPOINT:
174
3
                    case MVM_NFA_EDGE_CODEPOINT_LL:
175
3
                    case MVM_NFA_EDGE_CODEPOINT_NEG:
176
3
                    case MVM_NFA_EDGE_CODEPOINT_M:
177
3
                    case MVM_NFA_EDGE_CODEPOINT_M_NEG: {
178
3
                        MVMint64 cp_or_synth_count = MVM_serialization_read_int(tc, reader);
179
3
                        if (cp_or_synth_count >= 0) {
180
3
                            body->states[i][j].arg.g = (MVMGrapheme32)cp_or_synth_count;
181
3
                        }
182
0
                        else {
183
0
                            MVMint32 num_codes = -cp_or_synth_count;
184
0
                            MVMCodepoint *codes = MVM_malloc(num_codes * sizeof(MVMCodepoint));
185
0
                            MVMint32 k;
186
0
                            for (k = 0; k < num_codes; k++)
187
0
                                codes[k] = (MVMCodepoint)MVM_serialization_read_int(tc, reader);
188
0
                            body->states[i][j].arg.g = MVM_nfg_codes_to_grapheme(tc, codes, num_codes);
189
0
                            MVM_free(codes);
190
0
                        }
191
3
                        break;
192
3
                    }
193
0
                    case MVM_NFA_EDGE_CHARCLASS:
194
0
                    case MVM_NFA_EDGE_CHARCLASS_NEG:
195
0
                        body->states[i][j].arg.i = MVM_serialization_read_int(tc, reader);
196
0
                        break;
197
0
                    case MVM_NFA_EDGE_CHARLIST:
198
0
                    case MVM_NFA_EDGE_CHARLIST_NEG:
199
0
                        MVM_ASSIGN_REF(tc, &(root->header), body->states[i][j].arg.s, MVM_serialization_read_str(tc, reader));
200
0
                        break;
201
0
                    case MVM_NFA_EDGE_CODEPOINT_I:
202
0
                    case MVM_NFA_EDGE_CODEPOINT_I_LL:
203
0
                    case MVM_NFA_EDGE_CODEPOINT_I_NEG:
204
0
                    case MVM_NFA_EDGE_CODEPOINT_IM:
205
0
                    case MVM_NFA_EDGE_CODEPOINT_IM_NEG:
206
0
                    case MVM_NFA_EDGE_CHARRANGE:
207
0
                    case MVM_NFA_EDGE_CHARRANGE_NEG:
208
0
                    case MVM_NFA_EDGE_CHARRANGE_M:
209
0
                    case MVM_NFA_EDGE_CHARRANGE_M_NEG: {
210
0
                        body->states[i][j].arg.uclc.lc = MVM_serialization_read_int(tc, reader);
211
0
                        body->states[i][j].arg.uclc.uc = MVM_serialization_read_int(tc, reader);
212
0
                        break;
213
0
                    }
214
4
                }
215
4
            }
216
4
        }
217
1
    }
218
1
}
219
220
/* Compose the representation. */
221
2
static void compose(MVMThreadContext *tc, MVMSTable *st, MVMObject *info) {
222
2
    /* Nothing to do for this REPR. */
223
2
}
224
225
/* Set the size of the STable. */
226
130
static void deserialize_stable_size(MVMThreadContext *tc, MVMSTable *st, MVMSerializationReader *reader) {
227
130
    st->size = sizeof(MVMNFA);
228
130
}
229
230
/* Calculates the non-GC-managed memory we hold on to. */
231
882
static MVMuint64 unmanaged_size(MVMThreadContext *tc, MVMSTable *st, void *data) {
232
882
    MVMNFABody *body = (MVMNFABody *)data;
233
882
    MVMuint64 total;
234
882
    MVMint64 i;
235
882
236
882
    total = body->num_states * sizeof(MVMint64); /* for num_state_edges */
237
882
    total += body->num_states * sizeof(MVMNFAStateInfo *); /* for states level 1 */
238
28.2k
    for (i = 0; i < body->num_states; i++)
239
27.3k
        total += body->num_state_edges[i] * sizeof(MVMNFAStateInfo);
240
882
241
882
    return total;
242
882
}
243
244
/* Initializes the representation. */
245
130
const MVMREPROps * MVMNFA_initialize(MVMThreadContext *tc) {
246
130
    return &NFA_this_repr;
247
130
}
248
249
static const MVMREPROps NFA_this_repr = {
250
    type_object_for,
251
    MVM_gc_allocate_object,
252
    NULL, /* initialize */
253
    copy_to,
254
    MVM_REPR_DEFAULT_ATTR_FUNCS,
255
    MVM_REPR_DEFAULT_BOX_FUNCS,
256
    MVM_REPR_DEFAULT_POS_FUNCS,
257
    MVM_REPR_DEFAULT_ASS_FUNCS,
258
    MVM_REPR_DEFAULT_ELEMS,
259
    get_storage_spec,
260
    NULL, /* change_type */
261
    serialize,
262
    deserialize,
263
    NULL, /* serialize_repr_data */
264
    NULL, /* deserialize_repr_data */
265
    deserialize_stable_size,
266
    gc_mark,
267
    gc_free,
268
    NULL, /* gc_cleanup */
269
    NULL, /* gc_mark_repr_data */
270
    NULL, /* gc_free_repr_data */
271
    compose,
272
    NULL, /* spesh */
273
    "NFA", /* name */
274
    MVM_REPR_ID_NFA,
275
    unmanaged_size,
276
    NULL, /* describe_refs */
277
};
278
279
/* We may be provided a grapheme as a codepoint for non-synthetics, or as a
280
 * 1-char string for synthetics. */
281
111k
static MVMGrapheme32 get_grapheme(MVMThreadContext *tc, MVMObject *obj) {
282
111k
    /* Handle null and non-concrete case. */
283
111k
    if (MVM_is_null(tc, obj) || !IS_CONCRETE(obj)) {
284
0
        MVM_exception_throw_adhoc(tc,
285
0
            "NFA must be provided with a concrete string or integer for graphemes");
286
0
    }
287
111k
288
111k
    /* Otherwise, guess something appropriate. */
289
111k
    else {
290
111k
        const MVMStorageSpec *ss = REPR(obj)->get_storage_spec(tc, STABLE(obj));
291
111k
        if (ss->can_box & MVM_STORAGE_SPEC_CAN_BOX_INT)
292
111k
            return REPR(obj)->box_funcs.get_int(tc, STABLE(obj), obj, OBJECT_BODY(obj));
293
0
        else if (ss->can_box & MVM_STORAGE_SPEC_CAN_BOX_STR)
294
0
            return MVM_string_get_grapheme_at(tc,
295
0
                REPR(obj)->box_funcs.get_str(tc, STABLE(obj), obj, OBJECT_BODY(obj)),
296
0
                0);
297
0
        else
298
0
            MVM_exception_throw_adhoc(tc,
299
0
                "NFA must be provided with a string or integer for graphemes");
300
111k
    }
301
111k
}
302
303
4.56k
MVMObject * MVM_nfa_from_statelist(MVMThreadContext *tc, MVMObject *states, MVMObject *nfa_type) {
304
4.56k
    MVMObject  *nfa_obj;
305
4.56k
    MVMNFABody *nfa;
306
4.56k
    MVMint64    i, j, num_states;
307
4.56k
308
4.56k
    MVMROOT(tc, states, {
309
4.56k
    MVMROOT(tc, nfa_type, {
310
4.56k
        /* Create NFA object. */
311
4.56k
        nfa_obj = MVM_repr_alloc_init(tc, nfa_type);
312
4.56k
        nfa = (MVMNFABody *)OBJECT_BODY(nfa_obj);
313
4.56k
314
4.56k
        /* The first state entry is the fates list. */
315
4.56k
        nfa->fates = MVM_repr_at_pos_o(tc, states, 0);
316
4.56k
317
4.56k
        /* Go over the rest and convert to the NFA. */
318
4.56k
        num_states = MVM_repr_elems(tc, states) - 1;
319
4.56k
        nfa->num_states = num_states;
320
4.56k
        if (num_states > 0) {
321
4.56k
            nfa->num_state_edges = MVM_calloc(num_states, sizeof(MVMint64));
322
4.56k
            nfa->states = MVM_calloc(num_states, sizeof(MVMNFAStateInfo *));
323
4.56k
        }
324
4.56k
        for (i = 0; i < num_states; i++) {
325
4.56k
            MVMObject *edge_info = MVM_repr_at_pos_o(tc, states, i + 1);
326
4.56k
            MVMint64   elems     = MVM_repr_elems(tc, edge_info);
327
4.56k
            MVMint64   edges     = elems / 3;
328
4.56k
            MVMint64   cur_edge  = 0;
329
4.56k
330
4.56k
            nfa->num_state_edges[i] = edges;
331
4.56k
            if (edges > 0)
332
4.56k
                nfa->states[i] = MVM_malloc(edges * sizeof(MVMNFAStateInfo));
333
4.56k
334
4.56k
            for (j = 0; j < elems; j += 3) {
335
4.56k
                MVMint64 act = MVM_coerce_simple_intify(tc,
336
4.56k
                    MVM_repr_at_pos_o(tc, edge_info, j));
337
4.56k
                MVMint64 to  = MVM_coerce_simple_intify(tc,
338
4.56k
                    MVM_repr_at_pos_o(tc, edge_info, j + 2));
339
4.56k
                if (to <= 0 && act != MVM_NFA_EDGE_FATE)
340
4.56k
                    MVM_exception_throw_adhoc(tc, "Invalid to edge %"PRId64" in NFA statelist", to);
341
4.56k
342
4.56k
                nfa->states[i][cur_edge].act = act;
343
4.56k
                nfa->states[i][cur_edge].to = to;
344
4.56k
345
4.56k
                switch (act & 0xff) {
346
4.56k
                case MVM_NFA_EDGE_FATE:
347
4.56k
                    nfa->states[i][cur_edge].arg.i = MVM_coerce_simple_intify(tc,
348
4.56k
                        MVM_repr_at_pos_o(tc, edge_info, j + 1));
349
4.56k
                    break;
350
4.56k
                case MVM_NFA_EDGE_CODEPOINT:
351
4.56k
                case MVM_NFA_EDGE_CODEPOINT_LL:
352
4.56k
                case MVM_NFA_EDGE_CODEPOINT_NEG:
353
4.56k
                case MVM_NFA_EDGE_CODEPOINT_M:
354
4.56k
                case MVM_NFA_EDGE_CODEPOINT_M_NEG:
355
4.56k
                    nfa->states[i][cur_edge].arg.g = get_grapheme(tc,
356
4.56k
                        MVM_repr_at_pos_o(tc, edge_info, j + 1));
357
4.56k
                    break;
358
4.56k
                case MVM_NFA_EDGE_CHARCLASS:
359
4.56k
                case MVM_NFA_EDGE_CHARCLASS_NEG:
360
4.56k
                    nfa->states[i][cur_edge].arg.i = MVM_coerce_simple_intify(tc,
361
4.56k
                        MVM_repr_at_pos_o(tc, edge_info, j + 1));
362
4.56k
                    break;
363
4.56k
                case MVM_NFA_EDGE_CHARLIST:
364
4.56k
                case MVM_NFA_EDGE_CHARLIST_NEG:
365
4.56k
                    MVM_ASSIGN_REF(tc, &(nfa_obj->header),
366
4.56k
                        nfa->states[i][cur_edge].arg.s,
367
4.56k
                        MVM_repr_get_str(tc, MVM_repr_at_pos_o(tc, edge_info, j + 1)));
368
4.56k
                    break;
369
4.56k
                case MVM_NFA_EDGE_CODEPOINT_I:
370
4.56k
                case MVM_NFA_EDGE_CODEPOINT_I_LL:
371
4.56k
                case MVM_NFA_EDGE_CODEPOINT_I_NEG:
372
4.56k
                case MVM_NFA_EDGE_CODEPOINT_IM:
373
4.56k
                case MVM_NFA_EDGE_CODEPOINT_IM_NEG:
374
4.56k
                /* That is not about uppercase/lowercase here, but lower and upper bounds
375
4.56k
                   of our range. */
376
4.56k
                case MVM_NFA_EDGE_CHARRANGE:
377
4.56k
                case MVM_NFA_EDGE_CHARRANGE_NEG:
378
4.56k
                case MVM_NFA_EDGE_CHARRANGE_M:
379
4.56k
                case MVM_NFA_EDGE_CHARRANGE_M_NEG: {
380
4.56k
                    MVMObject *arg = MVM_repr_at_pos_o(tc, edge_info, j + 1);
381
4.56k
                    nfa->states[i][cur_edge].arg.uclc.lc = MVM_coerce_simple_intify(tc,
382
4.56k
                        MVM_repr_at_pos_o(tc, arg, 0));
383
4.56k
                    nfa->states[i][cur_edge].arg.uclc.uc = MVM_coerce_simple_intify(tc,
384
4.56k
                        MVM_repr_at_pos_o(tc, arg, 1));
385
4.56k
                    break;
386
4.56k
                }
387
4.56k
                }
388
4.56k
389
4.56k
                cur_edge++;
390
4.56k
            }
391
4.56k
        }
392
4.56k
    });
393
4.56k
    });
394
4.56k
395
4.56k
    return nfa_obj;
396
4.56k
}
397
398
/* Does a run of the NFA. Produces a list of integers indicating the
399
 * chosen ordering. */
400
577k
static MVMint64 * nqp_nfa_run(MVMThreadContext *tc, MVMNFABody *nfa, MVMString *target, MVMint64 offset, MVMint64 *total_fates_out) {
401
577k
    MVMint64  eos     = MVM_string_graphs(tc, target);
402
577k
    MVMint64  gen     = 1;
403
577k
    MVMint64  numcur  = 0;
404
577k
    MVMint64  numnext = 0;
405
577k
    MVMint64 *done, *fates, *curst, *nextst, *longlit;
406
577k
    MVMint64  i, fate_arr_len, num_states, total_fates, prev_fates, usedlonglit;
407
577k
    MVMint64  orig_offset = offset;
408
577k
    int nfadeb = tc->instance->nfa_debug_enabled;
409
577k
410
577k
    /* Obtain or (re)allocate "done states", "current states" and "next
411
577k
     * states" arrays. */
412
577k
    num_states = nfa->num_states;
413
577k
    if (tc->nfa_alloc_states < num_states) {
414
260
        size_t alloc   = (num_states + 1) * sizeof(MVMint64);
415
260
        tc->nfa_done   = (MVMint64 *)MVM_realloc(tc->nfa_done, alloc);
416
260
        tc->nfa_curst  = (MVMint64 *)MVM_realloc(tc->nfa_curst, alloc);
417
260
        tc->nfa_nextst = (MVMint64 *)MVM_realloc(tc->nfa_nextst, alloc);
418
260
        tc->nfa_alloc_states = num_states;
419
260
    }
420
577k
    done   = tc->nfa_done;
421
577k
    curst  = tc->nfa_curst;
422
577k
    nextst = tc->nfa_nextst;
423
577k
    memset(done, 0, (num_states + 1) * sizeof(MVMint64));
424
577k
425
577k
    /* Allocate fates array. */
426
577k
    fate_arr_len = 1 + MVM_repr_elems(tc, nfa->fates);
427
577k
    if (tc->nfa_fates_len < fate_arr_len) {
428
531
        tc->nfa_fates     = (MVMint64 *)MVM_realloc(tc->nfa_fates, sizeof(MVMint64) * fate_arr_len);
429
531
        tc->nfa_fates_len = fate_arr_len;
430
531
    }
431
577k
    fates = tc->nfa_fates;
432
577k
    total_fates = 0;
433
577k
    if (nfadeb) fprintf(stderr,"======================================\nStarting with %d fates in %d states\n", (int)fate_arr_len, (int)num_states) ;
434
577k
435
577k
    /* longlit will be updated on a fate whenever NFA passes through final char of a literal. */
436
577k
    /* These edges are specially marked to indicate which fate they influence the fate of. */
437
577k
    if (tc->nfa_longlit_len < fate_arr_len) {
438
531
        tc->nfa_longlit = (MVMint64 *)MVM_realloc(tc->nfa_longlit, sizeof(MVMint64) * fate_arr_len);
439
531
        tc->nfa_longlit_len  = fate_arr_len;
440
531
    }
441
577k
    longlit = tc->nfa_longlit;
442
577k
    usedlonglit = 0;
443
577k
444
577k
    nextst[numnext++] = 1;
445
1.70M
    while (numnext && offset <= eos) {
446
1.12M
        /* Swap next and current */
447
1.12M
        MVMint64 *temp = curst;
448
1.12M
        curst   = nextst;
449
1.12M
        nextst  = temp;
450
1.12M
        numcur  = numnext;
451
1.12M
        numnext = 0;
452
1.12M
453
1.12M
        /* Save how many fates we have before this position is considered. */
454
1.12M
        prev_fates = total_fates;
455
1.12M
456
1.12M
        if (nfadeb) {
457
0
            if (offset < eos) {
458
0
                MVMGrapheme32 cp = MVM_string_get_grapheme_at_nocheck(tc, target, offset);
459
0
                fprintf(stderr,"%c with %ds target %lx offset %"PRId64"\n",cp,(int)numcur, (long)target, offset);
460
0
            }
461
0
            else {
462
0
                fprintf(stderr,"EOS with %ds\n",(int)numcur);
463
0
            }
464
0
        }
465
4.41M
        while (numcur) {
466
3.29M
            MVMNFAStateInfo *edge_info;
467
3.29M
            MVMint64         edge_info_elems;
468
3.29M
469
3.29M
            MVMint64 st = curst[--numcur];
470
3.29M
            if (st <= num_states) {
471
3.29M
                if (done[st] == gen)
472
5.63k
                    continue;
473
3.28M
                done[st] = gen;
474
3.28M
            }
475
3.29M
476
3.28M
            edge_info = nfa->states[st - 1];
477
3.28M
            edge_info_elems = nfa->num_state_edges[st - 1];
478
3.28M
            if (nfadeb)
479
0
                fprintf(stderr,"\t%d\t%d\t",(int)st, (int)edge_info_elems);
480
14.5M
            for (i = 0; i < edge_info_elems; i++) {
481
11.3M
                MVMint64 act = edge_info[i].act;
482
11.3M
                MVMint64 to  = edge_info[i].to;
483
11.3M
484
11.3M
                /* All the special cases are under one test. */
485
11.3M
                if (act <= MVM_NFA_EDGE_EPSILON) {
486
3.58M
                    if (act < 0) {
487
1.15M
                        /* Negative indicates a fate is encoded in the act of the codepoint edge. */
488
1.15M
                        /* These will redispatch to one of the _LL cases below */
489
1.15M
                        act &= 0xff;
490
1.15M
                    }
491
2.42M
                    else if (act == MVM_NFA_EDGE_FATE) {
492
653k
                        /* Crossed a fate edge. Check if we already saw this fate, and
493
653k
                         * if so remove the entry so we can re-add at the new token length. */
494
653k
                        MVMint64 arg = edge_info[i].arg.i;
495
653k
                        MVMint64 j;
496
653k
                        MVMint64 found_fate = 0;
497
653k
                        if (nfadeb)
498
0
                            fprintf(stderr, "fate(%016llx) ", (long long unsigned int)arg);
499
1.18M
                        for (j = 0; j < total_fates; j++) {
500
530k
                            if (found_fate)
501
56.8k
                                fates[j - found_fate] = fates[j];
502
530k
                            if ((fates[j] & 0xffffff) == arg) {
503
358k
                                found_fate++;
504
358k
                                if (j < prev_fates)
505
354k
                                    prev_fates--;
506
358k
                            }
507
530k
                        }
508
653k
                        total_fates -= found_fate;
509
653k
                        if (arg < usedlonglit)
510
117k
                            arg -= longlit[arg] << 24;
511
653k
                        if (++total_fates > fate_arr_len) {
512
0
                            /* should never happen if nfa->fates is correct and dedup above works right */
513
0
                            fprintf(stderr, "oops adding %016llx to\n", (long long unsigned int)arg);
514
0
                            for (j = 0; j < total_fates - 1; j++) {
515
0
                                fprintf(stderr, "  %016llx\n", (long long unsigned int)fates[j]);
516
0
                            }
517
0
                            fate_arr_len      = total_fates + 10;
518
0
                            tc->nfa_fates     = (MVMint64 *)MVM_realloc(tc->nfa_fates,
519
0
                                sizeof(MVMint64) * fate_arr_len);
520
0
                            tc->nfa_fates_len = fate_arr_len;
521
0
                            fates             = tc->nfa_fates;
522
0
                        }
523
653k
                        /* a small insertion sort */
524
653k
                        j = total_fates - 1;
525
695k
                        while (--j >= prev_fates && fates[j] < arg) {
526
42.4k
                            fates[j + 1] = fates[j];
527
42.4k
                        }
528
653k
                        fates[++j] = arg;
529
653k
                        continue;
530
653k
                    }
531
1.77M
                    else if (act == MVM_NFA_EDGE_EPSILON && to <= num_states && done[to] != gen) {
532
1.76M
                        if (to)
533
1.76M
                            curst[numcur++] = to;
534
0
                        else if (nfadeb)  /* XXX should turn into a "can't happen" after rebootstrap */
535
0
                            fprintf(stderr, "  oops, ignoring epsilon to 0\n");
536
1.76M
                        continue;
537
1.76M
                    }
538
3.58M
                }
539
11.3M
540
8.89M
                if (offset >= eos) {
541
23.2k
                    /* Can't match, so drop state. */
542
23.2k
                    continue;
543
23.2k
                }
544
8.86M
                else {
545
8.86M
                    switch (act) {
546
1.15M
                        case MVM_NFA_EDGE_CODEPOINT_LL: {
547
1.15M
                            MVMGrapheme32 arg = edge_info[i].arg.g;
548
1.15M
                            if (MVM_string_get_grapheme_at_nocheck(tc, target, offset) == arg) {
549
55.3k
                                MVMint64 fate = (edge_info[i].act >> 8) & 0xfffff;
550
55.3k
                                nextst[numnext++] = to;
551
314k
                                while (usedlonglit <= fate)
552
258k
                                    longlit[usedlonglit++] = 0;
553
55.3k
                                longlit[fate] = offset - orig_offset + 1;
554
55.3k
                                if (nfadeb)
555
0
                                    fprintf(stderr, "%d->%d ", (int)i, (int)to);
556
55.3k
                            }
557
1.15M
                            continue;
558
1.15M
                        }
559
5.04M
                        case MVM_NFA_EDGE_CODEPOINT: {
560
5.04M
                            MVMGrapheme32 arg = edge_info[i].arg.g;
561
5.04M
                            if (MVM_string_get_grapheme_at_nocheck(tc, target, offset) == arg) {
562
139k
                                nextst[numnext++] = to;
563
139k
                                if (nfadeb)
564
0
                                    fprintf(stderr, "%d->%d ", (int)i, (int)to);
565
139k
                            }
566
5.04M
                            continue;
567
1.15M
                        }
568
0
                        case MVM_NFA_EDGE_CODEPOINT_NEG: {
569
0
                            MVMGrapheme32 arg = edge_info[i].arg.g;
570
0
                            if (MVM_string_get_grapheme_at_nocheck(tc, target, offset) != arg)
571
0
                                nextst[numnext++] = to;
572
0
                            continue;
573
1.15M
                        }
574
864k
                        case MVM_NFA_EDGE_CHARCLASS: {
575
864k
                            MVMint64 arg = edge_info[i].arg.i;
576
864k
                            if (MVM_string_is_cclass(tc, arg, target, offset))
577
435k
                                nextst[numnext++] = to;
578
864k
                            continue;
579
1.15M
                        }
580
89.3k
                        case MVM_NFA_EDGE_CHARCLASS_NEG: {
581
89.3k
                            MVMint64 arg = edge_info[i].arg.i;
582
89.3k
                            if (!MVM_string_is_cclass(tc, arg, target, offset))
583
85.9k
                                nextst[numnext++] = to;
584
89.3k
                            continue;
585
1.15M
                        }
586
1.70M
                        case MVM_NFA_EDGE_CHARLIST: {
587
1.70M
                            MVMString *arg    = edge_info[i].arg.s;
588
1.70M
                            MVMGrapheme32 cp = MVM_string_get_grapheme_at_nocheck(tc, target, offset);
589
1.70M
                            if (MVM_string_index_of_grapheme(tc, arg, cp) >= 0)
590
227k
                                nextst[numnext++] = to;
591
1.70M
                            continue;
592
1.15M
                        }
593
2.60k
                        case MVM_NFA_EDGE_CHARLIST_NEG: {
594
2.60k
                            MVMString *arg    = edge_info[i].arg.s;
595
2.60k
                            MVMGrapheme32 cp = MVM_string_get_grapheme_at_nocheck(tc, target, offset);
596
2.60k
                            if (MVM_string_index_of_grapheme(tc, arg, cp) < 0)
597
2.06k
                                nextst[numnext++] = to;
598
2.60k
                            continue;
599
1.15M
                        }
600
0
                        case MVM_NFA_EDGE_CODEPOINT_I_LL: {
601
0
                            MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
602
0
                            MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
603
0
                            MVMGrapheme32 ord    = MVM_string_get_grapheme_at_nocheck(tc, target, offset);
604
0
                            if (ord == lc_arg || ord == uc_arg) {
605
0
                                MVMint64 fate = (edge_info[i].act >> 8) & 0xfffff;
606
0
                                nextst[numnext++] = to;
607
0
                                while (usedlonglit <= fate)
608
0
                                    longlit[usedlonglit++] = 0;
609
0
                                longlit[fate] = offset - orig_offset + 1;
610
0
                            }
611
0
                            continue;
612
1.15M
                        }
613
0
                        case MVM_NFA_EDGE_CODEPOINT_I: {
614
0
                            MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
615
0
                            MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
616
0
                            MVMGrapheme32 ord    = MVM_string_get_grapheme_at_nocheck(tc, target, offset);
617
0
                            if (ord == lc_arg || ord == uc_arg)
618
0
                                nextst[numnext++] = to;
619
0
                            continue;
620
1.15M
                        }
621
0
                        case MVM_NFA_EDGE_CODEPOINT_I_NEG: {
622
0
                            MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
623
0
                            MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
624
0
                            MVMGrapheme32 ord    = MVM_string_get_grapheme_at_nocheck(tc, target, offset);
625
0
                            if (ord != lc_arg && ord != uc_arg)
626
0
                                nextst[numnext++] = to;
627
0
                            continue;
628
1.15M
                        }
629
6.63k
                        case MVM_NFA_EDGE_CHARRANGE: {
630
6.63k
                            MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
631
6.63k
                            MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
632
6.63k
                            MVMGrapheme32 ord    = MVM_string_get_grapheme_at_nocheck(tc, target, offset);
633
6.63k
                            if (ord >= lc_arg && ord <= uc_arg)
634
977
                                nextst[numnext++] = to;
635
6.63k
                            continue;
636
1.15M
                        }
637
0
                        case MVM_NFA_EDGE_CHARRANGE_NEG: {
638
0
                            MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
639
0
                            MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
640
0
                            MVMGrapheme32 ord    = MVM_string_get_grapheme_at_nocheck(tc, target, offset);
641
0
                            if (ord < lc_arg || ord > uc_arg)
642
0
                                nextst[numnext++] = to;
643
0
                            continue;
644
1.15M
                        }
645
0
                        case MVM_NFA_EDGE_SUBRULE:
646
0
                            if (nfadeb)
647
0
                                fprintf(stderr, "IGNORING SUBRULE\n");
648
0
                            continue;
649
0
                        case MVM_NFA_EDGE_CODEPOINT_M:
650
0
                        case MVM_NFA_EDGE_CODEPOINT_M_NEG: {
651
0
                            MVMNormalizer norm;
652
0
                            MVMint32 ready;
653
0
                            MVMGrapheme32 ga = edge_info[i].arg.g;
654
0
                            MVMGrapheme32 gb = MVM_string_ord_basechar_at(tc, target, offset);
655
0
656
0
                            MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFD);
657
0
                            ready = MVM_unicode_normalizer_process_codepoint_to_grapheme(tc, &norm, ga, &ga);
658
0
                            MVM_unicode_normalizer_eof(tc, &norm);
659
0
                            if (!ready)
660
0
                                ga = MVM_unicode_normalizer_get_grapheme(tc, &norm);
661
0
662
0
                            if (((act == MVM_NFA_EDGE_CODEPOINT_M)     && (ga == gb))
663
0
                             || ((act == MVM_NFA_EDGE_CODEPOINT_M_NEG) && (ga != gb)))
664
0
                                nextst[numnext++] = to;
665
0
                            MVM_unicode_normalizer_cleanup(tc, &norm);
666
0
                            continue;
667
0
                        }
668
0
                        case MVM_NFA_EDGE_CODEPOINT_IM:
669
0
                        case MVM_NFA_EDGE_CODEPOINT_IM_NEG: {
670
0
                            MVMNormalizer norm;
671
0
                            MVMint32 ready;
672
0
                            MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
673
0
                            MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
674
0
                            MVMGrapheme32 ord    = MVM_string_ord_basechar_at(tc, target, offset);
675
0
676
0
                            MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFD);
677
0
                            ready = MVM_unicode_normalizer_process_codepoint_to_grapheme(tc, &norm, uc_arg, &uc_arg);
678
0
                            MVM_unicode_normalizer_eof(tc, &norm);
679
0
                            if (!ready)
680
0
                                uc_arg = MVM_unicode_normalizer_get_grapheme(tc, &norm);
681
0
                            MVM_unicode_normalizer_cleanup(tc, &norm);
682
0
683
0
                            MVM_unicode_normalizer_init(tc, &norm, MVM_NORMALIZE_NFD);
684
0
                            ready = MVM_unicode_normalizer_process_codepoint_to_grapheme(tc, &norm, lc_arg, &lc_arg);
685
0
                            MVM_unicode_normalizer_eof(tc, &norm);
686
0
                            if (!ready)
687
0
                                lc_arg = MVM_unicode_normalizer_get_grapheme(tc, &norm);
688
0
689
0
                            if (((act == MVM_NFA_EDGE_CODEPOINT_IM)     && (ord == lc_arg || ord == uc_arg))
690
0
                             || ((act == MVM_NFA_EDGE_CODEPOINT_IM_NEG) && (ord != lc_arg && ord != uc_arg)))
691
0
                                nextst[numnext++] = to;
692
0
                            MVM_unicode_normalizer_cleanup(tc, &norm);
693
0
                            continue;
694
0
                        }
695
0
                        case MVM_NFA_EDGE_CHARRANGE_M: {
696
0
                            MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
697
0
                            MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
698
0
                            MVMGrapheme32 ord    = MVM_string_ord_basechar_at(tc, target, offset);
699
0
                            if (ord >= lc_arg && ord <= uc_arg)
700
0
                                nextst[numnext++] = to;
701
0
                            continue;
702
0
                        }
703
0
                        case MVM_NFA_EDGE_CHARRANGE_M_NEG: {
704
0
                            MVMGrapheme32 uc_arg = edge_info[i].arg.uclc.uc;
705
0
                            MVMGrapheme32 lc_arg = edge_info[i].arg.uclc.lc;
706
0
                            MVMGrapheme32 ord    = MVM_string_ord_basechar_at(tc, target, offset);
707
0
                            if (ord < lc_arg || ord > uc_arg)
708
0
                                nextst[numnext++] = to;
709
0
                            continue;
710
0
                        }
711
8.86M
                    }
712
8.86M
                }
713
8.89M
            }
714
3.28M
            if (nfadeb) fprintf(stderr,"\n");
715
3.28M
        }
716
1.12M
717
1.12M
        /* Move to next character and generation. */
718
1.12M
        offset++;
719
1.12M
        gen++;
720
1.12M
    }
721
577k
    /* strip any literal lengths, leaving only fates */
722
577k
    if (usedlonglit || nfadeb) {
723
29.2k
        if (nfadeb) fprintf(stderr,"Final\n");
724
69.6k
        for (i = 0; i < total_fates; i++) {
725
40.4k
            if (nfadeb) fprintf(stderr, "  %08llx\n", (long long unsigned int)fates[i]);
726
40.4k
            fates[i] &= 0xffffff;
727
40.4k
        }
728
29.2k
    }
729
577k
730
577k
    *total_fates_out = total_fates;
731
577k
    return fates;
732
577k
}
733
734
/* Takes an NFA, a target string in and an offset. Runs the NFA and returns
735
 * the order to try the fates in. */
736
255k
MVMObject * MVM_nfa_run_proto(MVMThreadContext *tc, MVMObject *nfa, MVMString *target, MVMint64 offset) {
737
255k
    /* Run the NFA. */
738
255k
    MVMint64  total_fates, i;
739
255k
    MVMint64 *fates = nqp_nfa_run(tc, (MVMNFABody *)OBJECT_BODY(nfa), target, offset, &total_fates);
740
255k
741
255k
    /* Copy results into an integer array. */
742
255k
    MVMObject *fateres = MVM_repr_alloc_init(tc, tc->instance->boot_types.BOOTIntArray);
743
337k
    for (i = 0; i < total_fates; i++)
744
81.7k
        MVM_repr_bind_pos_i(tc, fateres, i, fates[i]);
745
255k
746
255k
    return fateres;
747
255k
}
748
749
/* Takes an NFA, target string and offset. Runs the NFA, and uses the output
750
 * to update the bstack with backtracking points to try the alternation
751
 * branches in the correct order. The current capture stack is needed for its
752
 * height. */
753
void MVM_nfa_run_alt(MVMThreadContext *tc, MVMObject *nfa, MVMString *target,
754
321k
        MVMint64 offset, MVMObject *bstack, MVMObject *cstack, MVMObject *labels) {
755
321k
    /* Run the NFA. */
756
321k
    MVMint64  total_fates, i;
757
321k
    MVMint64 *fates = nqp_nfa_run(tc, (MVMNFABody *)OBJECT_BODY(nfa), target, offset, &total_fates);
758
321k
759
321k
    /* Push the results onto the bstack. */
760
321k
    MVMint64 caps = cstack && IS_CONCRETE(cstack)
761
13.3k
        ? MVM_repr_elems(tc, cstack)
762
308k
        : 0;
763
533k
    for (i = 0; i < total_fates; i++) {
764
212k
        MVM_repr_push_i(tc, bstack, MVM_repr_at_pos_i(tc, labels, fates[i]));
765
212k
        MVM_repr_push_i(tc, bstack, offset);
766
212k
        MVM_repr_push_i(tc, bstack, 0);
767
212k
        MVM_repr_push_i(tc, bstack, caps);
768
212k
    }
769
321k
}