Coverage Report

Created: 2017-04-15 07:07

/home/travis/build/MoarVM/MoarVM/src/strings/nfg.h
Line
Count
Source
1
/* State kept around for implementing Normal Form Grapheme. The design is such
2
 * that we can always do lookups without needing to acquire a lock. When we
3
 * do additions of new synthetics, we must acquire the lock before doing so,
4
 * and be sure to validate nothing changed. We also must do sufficient copying
5
 * to ensure that we never break another thread doing a read. Memory to be
6
 * freed is thus done at a global safe point, which means we never have one
7
 * thread reading memory freed by another. */
8
struct MVMNFGState {
9
    /* Table of information about synthetic graphemes. Given some (negative)
10
     * synthetic S, we look up in this table with (-S - 1). */
11
    MVMNFGSynthetic *synthetics;
12
13
    /* Trie used to do lookups by codepoints (already in NFC) to an (NFG)
14
     * grapheme. */
15
    MVMNFGTrieNode *grapheme_lookup;
16
17
    /* Mutex used when we wish to do updates to the grapheme table. */
18
    uv_mutex_t update_mutex;
19
20
    /* Number of synthetics we have. */
21
    MVMint32 num_synthetics;
22
};
23
24
/* State held about a synthetic. */
25
struct MVMNFGSynthetic {
26
    /* The base (non-combining) grapheme. */
27
    MVMCodepoint base;
28
29
    /* The number of combiners we have. */
30
    MVMint32 num_combs;
31
32
    /* Array of combiners. */
33
    MVMCodepoint *combs;
34
35
    /* Cached case transforms, NULL if not calculated. */
36
    MVMGrapheme32 *case_uc;
37
    MVMGrapheme32 *case_lc;
38
    MVMGrapheme32 *case_tc;
39
    MVMGrapheme32 *case_fc;
40
41
    /* Grapheme counts of cached case transforms. */
42
    MVMint32 case_uc_graphs;
43
    MVMint32 case_lc_graphs;
44
    MVMint32 case_tc_graphs;
45
    MVMint32 case_fc_graphs;
46
47
    /* Is this a UTF-8 C-8 synthetic? */
48
    MVMint32 is_utf8_c8;
49
};
50
51
/* A node in the NFG trie. */
52
struct MVMNFGTrieNode {
53
    /* Set of entries for further traversal, sorted ascending on codepoint
54
     * so we can find an entry using binary search. */
55
    MVMNFGTrieNodeEntry *next_codes;
56
57
    /* Number of entries in next_cps. */
58
    MVMint32 num_entries;
59
60
    /* Non-zero if we reach a result at this node (and will always be negative
61
     * since it's an NFG synthetic). */
62
    MVMGrapheme32 graph;
63
};
64
65
/* An entry in the list of next possible codepoints in the NFG trie. */
66
struct MVMNFGTrieNodeEntry {
67
    /* The codepoint. */
68
    MVMCodepoint code;
69
70
    /* Trie node to traverse to if we find this node. */
71
    MVMNFGTrieNode *node;
72
};
73
74
/* The maximum number of codepoints we will allow in a synthetic grapheme.
75
 * This is a good bit higher than any real-world use case is going to run
76
 * in to. */
77
179
#define MVM_GRAPHEME_MAX_CODEPOINTS 1024
78
79
/* Functions related to grapheme handling. */
80
MVMGrapheme32 MVM_nfg_codes_to_grapheme(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes);
81
MVMGrapheme32 MVM_nfg_codes_to_grapheme_utf8_c8(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes);
82
MVMGrapheme32 MVM_nfg_crlf_grapheme(MVMThreadContext *tc);
83
MVMNFGSynthetic * MVM_nfg_get_synthetic_info(MVMThreadContext *tc, MVMGrapheme32 synth);
84
MVMuint32 MVM_nfg_get_case_change(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint32 case_, MVMGrapheme32 **result);
85
MVMint32 MVM_nfg_is_concat_stable(MVMThreadContext *tc, MVMString *a, MVMString *b);
86
87
/* NFG subsystem cleanup. */
88
void MVM_nfg_destroy(MVMThreadContext *tc);