Coverage Report

Created: 2018-07-03 15:31

/home/travis/build/MoarVM/MoarVM/src/strings/decode_stream.h
Line
Count
Source (jump to first uncovered line)
1
/* Represents a bytes => chars decoding stream. */
2
struct MVMDecodeStream {
3
    /* Head and tail of the input byte buffers. */
4
    MVMDecodeStreamBytes *bytes_head;
5
    MVMDecodeStreamBytes *bytes_tail;
6
7
    /* Head and tail of the output char buffers. */
8
    MVMDecodeStreamChars *chars_head;
9
    MVMDecodeStreamChars *chars_tail;
10
11
    /* Often, when reading lines or chunks, we'll fill up one char buffer
12
     * and then immediately take it. That results in a lot of allocating
13
     * and freeing of MVMDecodeStreamChars structures. Keeping a free one
14
     * avoids this. (There's not really a common steady state where we
15
     * have multiple free ones, so a free isn't worth the extra work.) */
16
    MVMDecodeStreamChars *chars_reuse;
17
18
    /* The byte position (for tell). */
19
    MVMint64 abs_byte_pos;
20
21
    /* How far we've eaten into the current head bytes buffer. */
22
    MVMint32 bytes_head_pos;
23
24
    /* How far we've eaten into the current head char buffer. */
25
    MVMint32 chars_head_pos;
26
27
    /* The encoding we're using. */
28
    MVMint32 encoding;
29
30
    /* Suggestion for decoders of how many bytes to guess at when allocating
31
     * decoded result buffers. */
32
    MVMint32 result_size_guess;
33
34
    /* Normalizer. */
35
    MVMNormalizer norm;
36
37
    /* Optional place for the decoder to keep any extra state it needs between
38
     * decode calls. Will be freed when the decode stream is destroyed. */
39
    void *decoder_state;
40
41
    /* Stores a replacement which is used upon encountering undecodable characters.
42
     * Set to NULL if a replacement is not desired. */
43
    MVMString *replacement;
44
45
    /* Currently stores only whether or not the decoder should decode strictly or
46
     * permissively. Set to 1 for permissive decoding, default is strict */
47
    MVMuint32 config;
48
};
49
50
/* A single bunch of bytes added to a decode stream, with a link to the next
51
 * one, if any. */
52
struct MVMDecodeStreamBytes {
53
    char                 *bytes;
54
    MVMint32              length;
55
    MVMDecodeStreamBytes *next;
56
};
57
58
/* A bunch of characters already decoded, with a link to the next bunch. */
59
struct MVMDecodeStreamChars {
60
    MVMGrapheme32        *chars;
61
    MVMint32              length;
62
    MVMDecodeStreamChars *next;
63
};
64
65
/* For situations where we need to decode up to some separators, this data
66
 * structure holds the information about them. */
67
struct MVMDecodeStreamSeparators {
68
    /* The lengths of the separators, in graphemes. */
69
    MVMint32 *sep_lengths;
70
71
    /* The grapehemes themselves, in a single array (use sep_lengths to find
72
     * out how many there are in each separator). */
73
    MVMGrapheme32 *sep_graphemes;
74
75
    /* The number of separators we have. */
76
    MVMint32 num_seps;
77
78
    /* Cached maximum separator length, to save regular recalculation. */
79
    MVMint32 max_sep_length;
80
81
    /* Cached final graphemes, for quick lookups in stream_maybe_sep. */
82
    MVMGrapheme32 *final_graphemes;
83
84
    /* Since separators are most often control chars, we can quickly filter
85
     * out many graphemes without a separator search by keeping around the
86
     * maximum codepoint/synthetic index of any final grapheme and doing a
87
     * quick comparison. */
88
    MVMGrapheme32 max_final_grapheme;
89
};
90
91
/* Checks if we may have encountered one of the separators. This just looks to
92
 * see if we hit the final grapheme of any of the separators, which is all we
93
 * demand the actual encodings themselves work out (multi-grapheme separators
94
 * are handled in the decode stream logic itself). */
95
0
MVM_STATIC_INLINE MVMint32 MVM_string_decode_stream_maybe_sep(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec, MVMGrapheme32 g) {
96
0
    if (sep_spec && g <= sep_spec->max_final_grapheme) {
97
0
        MVMint32 i;
98
0
        for (i = 0; i < sep_spec->num_seps; i++)
99
0
            if (sep_spec->final_graphemes[i] == g)
100
0
                return 1;
101
0
    }
102
0
    return 0;
103
0
}
104
105
MVMDecodeStream * MVM_string_decodestream_create(MVMThreadContext *tc, MVMint32 encoding, MVMint64 abs_byte_pos, MVMint32 translate_newlines);
106
void MVM_string_decodestream_add_bytes(MVMThreadContext *tc, MVMDecodeStream *ds, char *bytes, MVMint32 length);
107
void MVM_string_decodestream_add_chars(MVMThreadContext *tc, MVMDecodeStream *ds, MVMGrapheme32 *chars, MVMint32 length);
108
void MVM_string_decodestream_discard_to(MVMThreadContext *tc, MVMDecodeStream *ds, const MVMDecodeStreamBytes *bytes, MVMint32 pos);
109
MVMString * MVM_string_decodestream_get_chars(MVMThreadContext *tc, MVMDecodeStream *ds, MVMint32 chars, MVMint64 eof);
110
MVMString * MVM_string_decodestream_get_until_sep(MVMThreadContext *tc, MVMDecodeStream *ds, MVMDecodeStreamSeparators *seps, MVMint32 chomp);
111
MVMString * MVM_string_decodestream_get_until_sep_eof(MVMThreadContext *tc, MVMDecodeStream *ds, MVMDecodeStreamSeparators *sep_spec, MVMint32 chomp);
112
MVMString * MVM_string_decodestream_get_all(MVMThreadContext *tc, MVMDecodeStream *ds);
113
MVMString * MVM_string_decodestream_get_available(MVMThreadContext *tc, MVMDecodeStream *ds);
114
MVMint64 MVM_string_decodestream_have_bytes(MVMThreadContext *tc, const MVMDecodeStream *ds, MVMint32 bytes);
115
MVMint64 MVM_string_decodestream_bytes_available(MVMThreadContext *tc, const MVMDecodeStream *ds);
116
MVMint64 MVM_string_decodestream_bytes_to_buf(MVMThreadContext *tc, MVMDecodeStream *ds, char **buf, MVMint32 bytes);
117
MVMint64 MVM_string_decodestream_tell_bytes(MVMThreadContext *tc, const MVMDecodeStream *ds);
118
MVMint32 MVM_string_decodestream_is_empty(MVMThreadContext *tc, MVMDecodeStream *ds);
119
void MVM_string_decodestream_destroy(MVMThreadContext *tc, MVMDecodeStream *ds);
120
void MVM_string_decode_stream_sep_default(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec);
121
void MVM_string_decode_stream_sep_from_strings(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec, MVMString **seps, MVMint32 num_seps);
122
void MVM_string_decode_stream_sep_destroy(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec);