/home/travis/build/MoarVM/MoarVM/src/strings/decode_stream.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* Represents a bytes => chars decoding stream. */ |
2 | | struct MVMDecodeStream { |
3 | | /* Head and tail of the input byte buffers. */ |
4 | | MVMDecodeStreamBytes *bytes_head; |
5 | | MVMDecodeStreamBytes *bytes_tail; |
6 | | |
7 | | /* Head and tail of the output char buffers. */ |
8 | | MVMDecodeStreamChars *chars_head; |
9 | | MVMDecodeStreamChars *chars_tail; |
10 | | |
11 | | /* The byte position (for tell). */ |
12 | | MVMint64 abs_byte_pos; |
13 | | |
14 | | /* How far we've eaten into the current head bytes buffer. */ |
15 | | MVMint32 bytes_head_pos; |
16 | | |
17 | | /* How far we've eaten into the current head char buffer. */ |
18 | | MVMint32 chars_head_pos; |
19 | | |
20 | | /* The encoding we're using. */ |
21 | | MVMint32 encoding; |
22 | | |
23 | | /* Normalizer. */ |
24 | | MVMNormalizer norm; |
25 | | |
26 | | /* Optional place for the decoder to keep any extra state it needs between |
27 | | * decode calls. Will be freed when the decode stream is destroyed. */ |
28 | | void *decoder_state; |
29 | | }; |
30 | | |
31 | | /* A single bunch of bytes added to a decode stream, with a link to the next |
32 | | * one, if any. */ |
33 | | struct MVMDecodeStreamBytes { |
34 | | char *bytes; |
35 | | MVMint32 length; |
36 | | MVMDecodeStreamBytes *next; |
37 | | }; |
38 | | |
39 | | /* A bunch of characters already decoded, with a link to the next bunch. */ |
40 | | struct MVMDecodeStreamChars { |
41 | | MVMGrapheme32 *chars; |
42 | | MVMint32 length; |
43 | | MVMDecodeStreamChars *next; |
44 | | }; |
45 | | |
46 | | /* For situations where we need to decode up to some separators, this data |
47 | | * structure holds the information about them. */ |
48 | | struct MVMDecodeStreamSeparators { |
49 | | /* The lengths of the separators, in graphemes. */ |
50 | | MVMint32 *sep_lengths; |
51 | | |
52 | | /* The grapehemes themselves, in a single array (use sep_lengths to find |
53 | | * out how many there are in each separator). */ |
54 | | MVMGrapheme32 *sep_graphemes; |
55 | | |
56 | | /* The number of separators we have. */ |
57 | | MVMint32 num_seps; |
58 | | }; |
59 | | |
60 | | /* Checks if we may have encountered one of the separators. This just looks to |
61 | | * see if we hit the final grapheme of any of the separators, which is all we |
62 | | * demand the actual encodings themselves work out (multi-grapheme separators |
63 | | * are handled in the decode stream logic itself). */ |
64 | 0 | MVM_STATIC_INLINE MVMint32 MVM_string_decode_stream_maybe_sep(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec, MVMGrapheme32 g) { |
65 | 0 | if (sep_spec) { |
66 | 0 | MVMint32 cur_graph = -1; |
67 | 0 | MVMint32 i; |
68 | 0 | for (i = 0; i < sep_spec->num_seps; i++) { |
69 | 0 | cur_graph += sep_spec->sep_lengths[i]; |
70 | 0 | if (sep_spec->sep_graphemes[cur_graph] == g) |
71 | 0 | return 1; |
72 | 0 | } |
73 | 0 | } |
74 | 0 | return 0; |
75 | 0 | } |
76 | | |
77 | | MVMDecodeStream * MVM_string_decodestream_create(MVMThreadContext *tc, MVMint32 encoding, MVMint64 abs_byte_pos, MVMint32 translate_newlines); |
78 | | void MVM_string_decodestream_add_bytes(MVMThreadContext *tc, MVMDecodeStream *ds, char *bytes, MVMint32 length); |
79 | | void MVM_string_decodestream_add_chars(MVMThreadContext *tc, MVMDecodeStream *ds, MVMGrapheme32 *chars, MVMint32 length); |
80 | | void MVM_string_decodestream_discard_to(MVMThreadContext *tc, MVMDecodeStream *ds, const MVMDecodeStreamBytes *bytes, MVMint32 pos); |
81 | | MVMString * MVM_string_decodestream_get_chars(MVMThreadContext *tc, MVMDecodeStream *ds, MVMint32 chars); |
82 | | MVMString * MVM_string_decodestream_get_until_sep(MVMThreadContext *tc, MVMDecodeStream *ds, MVMDecodeStreamSeparators *seps, MVMint32 chomp); |
83 | | MVMString * MVM_string_decodestream_get_until_sep_eof(MVMThreadContext *tc, MVMDecodeStream *ds, MVMDecodeStreamSeparators *sep_spec, MVMint32 chomp); |
84 | | MVMString * MVM_string_decodestream_get_all(MVMThreadContext *tc, MVMDecodeStream *ds); |
85 | | MVMString * MVM_string_decodestream_get_available(MVMThreadContext *tc, MVMDecodeStream *ds); |
86 | | MVMint64 MVM_string_decodestream_have_bytes(MVMThreadContext *tc, const MVMDecodeStream *ds, MVMint32 bytes); |
87 | | MVMint64 MVM_string_decodestream_bytes_available(MVMThreadContext *tc, const MVMDecodeStream *ds); |
88 | | MVMint64 MVM_string_decodestream_bytes_to_buf(MVMThreadContext *tc, MVMDecodeStream *ds, char **buf, MVMint32 bytes); |
89 | | MVMint64 MVM_string_decodestream_tell_bytes(MVMThreadContext *tc, const MVMDecodeStream *ds); |
90 | | MVMint32 MVM_string_decodestream_is_empty(MVMThreadContext *tc, MVMDecodeStream *ds); |
91 | | void MVM_string_decodestream_destroy(MVMThreadContext *tc, MVMDecodeStream *ds); |
92 | | void MVM_string_decode_stream_sep_default(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec); |
93 | | void MVM_string_decode_stream_sep_from_strings(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec, MVMString **seps, MVMint32 num_seps); |
94 | | MVMint32 MVM_string_decode_stream_sep_max_chars(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec); |
95 | | void MVM_string_decode_stream_sep_destroy(MVMThreadContext *tc, MVMDecodeStreamSeparators *sep_spec); |