/home/travis/build/MoarVM/MoarVM/src/6model/reprs/MVMString.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* Representation used by VM-level strings. |
2 | | * |
3 | | * Strings come in one of 3 forms today, with 1 expected future form: |
4 | | * - 32-bit buffer of graphemes (Unicode codepoints or synthetic codepoints) |
5 | | * - 8-bit buffer of codepoints that all fall in the ASCII range |
6 | | * - Buffer of strands |
7 | | * - (LATER) 8-bit buffer of codepoints with negatives as synthetics (we |
8 | | * draw out a distinction with the ASCII range buffer because we can do |
9 | | * some I/O simplifications when we know all is in the ASCII range). |
10 | | * |
11 | | * A buffer of strands represents a string made up of other non-strand |
12 | | * strings. That is, there's no recursive strands. This simplifies the |
13 | | * process of iteration enormously. A strand may refer to just part of |
14 | | * another string by specifying offsets. Furthermore, it may specify a |
15 | | * repetition count. |
16 | | */ |
17 | | |
18 | | /* Kinds of grapheme we may hold in a string. */ |
19 | | typedef MVMint32 MVMGrapheme32; |
20 | | typedef MVMint8 MVMGraphemeASCII; |
21 | | typedef MVMint8 MVMGrapheme8; /* Future use */ |
22 | | |
23 | | /* What kind of data is a string storing? */ |
24 | 0 | #define MVM_STRING_GRAPHEME_32 0 |
25 | 0 | #define MVM_STRING_GRAPHEME_ASCII 1 |
26 | 0 | #define MVM_STRING_GRAPHEME_8 2 |
27 | 0 | #define MVM_STRING_STRAND 3 |
28 | | |
29 | | /* String index data type, for when we talk about indexes. */ |
30 | | typedef MVMuint32 MVMStringIndex; |
31 | | |
32 | | /* Data type for a Unicode codepoint. */ |
33 | | typedef MVMint32 MVMCodepoint; |
34 | | |
35 | | /* Maximum number of strands we will have. */ |
36 | | #define MVM_STRING_MAX_STRANDS 64 |
37 | | |
38 | | /* The body of a string. */ |
39 | | struct MVMStringBody { |
40 | | union { |
41 | | MVMGrapheme32 *blob_32; |
42 | | MVMGraphemeASCII *blob_ascii; |
43 | | MVMGrapheme8 *blob_8; |
44 | | MVMStringStrand *strands; |
45 | | void *any; |
46 | | } storage; |
47 | | MVMuint16 storage_type; |
48 | | MVMuint16 num_strands; |
49 | | MVMuint32 num_graphs; |
50 | | MVMhashv cached_hash_code; |
51 | | }; |
52 | | |
53 | | /* A strand of a string. */ |
54 | | struct MVMStringStrand { |
55 | | /* Another string that must be some kind of grapheme string. */ |
56 | | MVMString *blob_string; |
57 | | |
58 | | /* Start and end indexes we refer to in the blob string. */ |
59 | | MVMStringIndex start; |
60 | | MVMStringIndex end; |
61 | | |
62 | | /* Number of repetitions. */ |
63 | | MVMuint32 repetitions; |
64 | | }; |
65 | | |
66 | | /* The MVMString, with header and body. */ |
67 | | struct MVMString { |
68 | | MVMObject common; |
69 | | MVMStringBody body; |
70 | | }; |
71 | | |
72 | | /* Function for REPR setup. */ |
73 | | const MVMREPROps * MVMString_initialize(MVMThreadContext *tc); |