Coverage Report

Created: 2017-04-15 07:07

/home/travis/build/MoarVM/MoarVM/src/strings/iter.h
Line
Count
Source (jump to first uncovered line)
1
/* Grapheme iterator structure; iterates through graphemes in a string. */
2
struct MVMGraphemeIter {
3
    /* The blob we're currently iterating over. */
4
    union {
5
        MVMGrapheme32    *blob_32;
6
        MVMGraphemeASCII *blob_ascii;
7
        MVMGrapheme8     *blob_8;
8
        void             *any;
9
    } active_blob;
10
11
    /* The type of blob we have. */
12
    MVMuint16 blob_type;
13
14
    /* The number of strands remaining, if any. */
15
    MVMuint16 strands_remaining;
16
17
    /* The current position, and the end position. */
18
    MVMStringIndex pos;
19
    MVMStringIndex end;
20
21
    /* Repetition count, and the start index in the blob (only needed if we're
22
     * doing an iteration over a repetition). */
23
    MVMStringIndex start;
24
    MVMuint32      repetitions;
25
26
    /* The next strand, if we're doing a strand-based iteration. */
27
    MVMStringStrand *next_strand;
28
};
29
30
/* Initializes a grapheme iterator. */
31
0
MVM_STATIC_INLINE void MVM_string_gi_init(MVMThreadContext *tc, MVMGraphemeIter *gi, MVMString *s) {
32
0
    if (s->body.storage_type == MVM_STRING_STRAND) {
33
0
        MVMStringStrand *strands = s->body.storage.strands;
34
0
        MVMString       *first   = strands[0].blob_string;
35
0
        gi->active_blob.any      = first->body.storage.any;
36
0
        gi->blob_type            = first->body.storage_type;
37
0
        gi->strands_remaining    = s->body.num_strands - 1;
38
0
        gi->pos                  = strands[0].start;
39
0
        gi->end                  = strands[0].end;
40
0
        gi->start                = strands[0].start;
41
0
        gi->repetitions          = strands[0].repetitions;
42
0
        gi->next_strand          = strands + 1;
43
0
    }
44
0
    else {
45
0
        gi->active_blob.any   = s->body.storage.any;
46
0
        gi->blob_type         = s->body.storage_type;
47
0
        gi->strands_remaining = 0;
48
0
        gi->pos               = 0;
49
0
        gi->end               = s->body.num_graphs;
50
0
        gi->repetitions       = 0;
51
0
    }
52
0
};
53
54
/* Sets the position of the iterator. (Can be optimized in many ways in the
55
 * repetitions and strands branches.) */
56
0
MVM_STATIC_INLINE void MVM_string_gi_move_to(MVMThreadContext *tc, MVMGraphemeIter *gi, MVMuint32 pos) {
57
0
    MVMuint32 remaining = pos;
58
0
    MVMuint32 strand_graphs;
59
0
60
0
    /* Find the appropriate strand. */
61
0
    while (remaining > (strand_graphs = (gi->end - gi->pos) * (gi->repetitions + 1))) {
62
0
        MVMStringStrand *next = gi->next_strand;
63
0
        if (!gi->strands_remaining)
64
0
            MVM_exception_throw_adhoc(tc, "Iteration past end of grapheme iterator");
65
0
        gi->active_blob.any = next->blob_string->body.storage.any;
66
0
        gi->blob_type       = next->blob_string->body.storage_type;
67
0
        gi->pos             = next->start;
68
0
        gi->end             = next->end;
69
0
        gi->start           = next->start;
70
0
        gi->repetitions     = next->repetitions;
71
0
        gi->strands_remaining--;
72
0
        gi->next_strand++;
73
0
        remaining -= strand_graphs;
74
0
    }
75
0
76
0
    /* Now look within the strand. */
77
0
    while (1) {
78
0
        if (remaining == 0) {
79
0
            return;
80
0
        }
81
0
        if (gi->pos < gi->end) {
82
0
            if (gi->pos + remaining <= gi->end) {
83
0
                gi->pos += remaining;
84
0
                return;
85
0
            }
86
0
            remaining -= gi->end - gi->pos;
87
0
            gi->pos = gi->end;
88
0
        }
89
0
        else if (gi->repetitions) {
90
0
            MVMuint32 rep_graphs     = gi->end - gi->start;
91
0
            MVMuint32 remaining_reps = remaining / rep_graphs;
92
0
            if (remaining_reps > gi->repetitions)
93
0
                remaining_reps = gi->repetitions;
94
0
            gi->repetitions -= remaining_reps;
95
0
            remaining       -= remaining_reps * rep_graphs;
96
0
            if (gi->repetitions) {
97
0
                gi->pos = gi->start;
98
0
                gi->repetitions--; /* Next read will be reading *this* repetition. */
99
0
            }
100
0
        }
101
0
        else {
102
0
            MVM_exception_throw_adhoc(tc, "Iteration past end of grapheme iterator");
103
0
        }
104
0
    }
105
0
}
106
107
/* Checks if there is more to read from a grapheme iterator. */
108
0
MVM_STATIC_INLINE MVMint32 MVM_string_gi_has_more(MVMThreadContext *tc, MVMGraphemeIter *gi) {
109
0
    return gi->pos < gi->end || gi->repetitions || gi->strands_remaining;
110
0
}
111
112
/* Gets the next grapheme. */
113
0
MVM_STATIC_INLINE MVMGrapheme32 MVM_string_gi_get_grapheme(MVMThreadContext *tc, MVMGraphemeIter *gi) {
114
0
    while (1) {
115
0
        if (gi->pos < gi->end) {
116
0
            switch (gi->blob_type) {
117
0
            case MVM_STRING_GRAPHEME_32:
118
0
                return gi->active_blob.blob_32[gi->pos++];
119
0
            case MVM_STRING_GRAPHEME_ASCII:
120
0
                return gi->active_blob.blob_ascii[gi->pos++];
121
0
            case MVM_STRING_GRAPHEME_8:
122
0
                return gi->active_blob.blob_8[gi->pos++];
123
0
            }
124
0
        }
125
0
        else if (gi->repetitions) {
126
0
            gi->pos = gi->start;
127
0
            gi->repetitions--;
128
0
        }
129
0
        else if (gi->strands_remaining) {
130
0
            MVMStringStrand *next = gi->next_strand;
131
0
            gi->active_blob.any = next->blob_string->body.storage.any;
132
0
            gi->blob_type       = next->blob_string->body.storage_type;
133
0
            gi->pos             = next->start;
134
0
            gi->end             = next->end;
135
0
            gi->start           = next->start;
136
0
            gi->repetitions     = next->repetitions;
137
0
            gi->strands_remaining--;
138
0
            gi->next_strand++;
139
0
        }
140
0
        else {
141
0
            MVM_exception_throw_adhoc(tc, "Iteration past end of grapheme iterator");
142
0
        }
143
0
    }
144
0
}
145
146
/* Code point iterator. Uses the grapheme iterator, and adds some extra bits
147
 * in order to iterate the code points in synthetics. */
148
struct MVMCodepointIter {
149
    /* The grapheme iterator. */
150
    MVMGraphemeIter gi;
151
152
    /* The codes of the current synthetic we're walking through, if any, with
153
     * the number of combiners we returned so far, and the total number of
154
     * combiners there are. */
155
    MVMCodepoint  *synth_codes;
156
    MVMint32       visited_synth_codes;
157
    MVMint32       total_synth_codes;
158
159
    /* If we should translate newline \n into \r\n. */
160
    MVMint32       translate_newlines;
161
};
162
163
/* Initializes a code point iterator. */
164
MVM_STATIC_INLINE void MVM_string_ci_init(MVMThreadContext *tc, MVMCodepointIter *ci, MVMString *s,
165
0
        MVMint32 translate_newlines) {
166
0
    /* Initialize our underlying grapheme iterator. */
167
0
    MVM_string_gi_init(tc, &(ci->gi), s);
168
0
169
0
    /* We've no currently active synthetic codepoint (and other fields are
170
0
     * unused until we do, so leave them alone for now). */
171
0
    ci->synth_codes = NULL;
172
0
    ci->translate_newlines = translate_newlines;
173
0
};
174
175
/* Checks if there is more to read from a code point iterator; this is the
176
 * case if we're still walking through a synthetic or we have more things
177
 * available from the underlying grapheme iterator. */
178
0
MVM_STATIC_INLINE MVMint32 MVM_string_ci_has_more(MVMThreadContext *tc, MVMCodepointIter *ci) {
179
0
    return ci->synth_codes || MVM_string_gi_has_more(tc, &(ci->gi));
180
0
}
181
182
/* Gets the next code point. */
183
0
MVM_STATIC_INLINE MVMCodepoint MVM_string_ci_get_codepoint(MVMThreadContext *tc, MVMCodepointIter *ci) {
184
0
    MVMCodepoint result;
185
0
186
0
    /* Do we have combiners from a synthetic to return? */
187
0
    if (ci->synth_codes) {
188
0
        /* Take the current combiner as the result. */
189
0
        result = ci->synth_codes[ci->visited_synth_codes];
190
0
191
0
        /* If we've seen all of the synthetics, clear up so we'll take another
192
0
         * grapheme next time around. */
193
0
        ci->visited_synth_codes++;
194
0
        if (ci->visited_synth_codes == ci->total_synth_codes)
195
0
            ci->synth_codes = NULL;
196
0
    }
197
0
198
0
    /* Otherwise, proceed to the next grapheme. */
199
0
    else {
200
0
        MVMGrapheme32 g = MVM_string_gi_get_grapheme(tc, &(ci->gi));
201
0
        if (ci->translate_newlines && g == '\n')
202
0
            g = MVM_nfg_crlf_grapheme(tc);
203
0
        if (g >= 0) {
204
0
            /* It's not a synthetic, so we're done. */
205
0
            result = (MVMCodepoint)g;
206
0
        }
207
0
        else {
208
0
            /* It's a synthetic. Look it up. */
209
0
            MVMNFGSynthetic *synth = MVM_nfg_get_synthetic_info(tc, g);
210
0
211
0
            /* Set up the iterator so in the next iteration we will start to
212
0
            * hand back combiners. */
213
0
            ci->synth_codes         = synth->combs;
214
0
            ci->visited_synth_codes = 0;
215
0
            ci->total_synth_codes   = synth->num_combs;
216
0
217
0
            /* Result is the base character of the grapheme. */
218
0
            result = synth->base;
219
0
        }
220
0
    }
221
0
222
0
    return result;
223
0
}