Coverage Report

Created: 2018-07-03 15:31

/home/travis/build/MoarVM/MoarVM/src/strings/nfg.h
Line
Count
Source
1
/* State kept around for implementing Normal Form Grapheme. The design is such
2
 * that we can always do lookups without needing to acquire a lock. When we
3
 * do additions of new synthetics, we must acquire the lock before doing so,
4
 * and be sure to validate nothing changed. We also must do sufficient copying
5
 * to ensure that we never break another thread doing a read. Memory to be
6
 * freed is thus done at a global safe point, which means we never have one
7
 * thread reading memory freed by another. */
8
struct MVMNFGState {
9
    /* Table of information about synthetic graphemes. Given some (negative)
10
     * synthetic S, we look up in this table with (-S - 1). */
11
    MVMNFGSynthetic *synthetics;
12
13
    /* Trie used to do lookups by codepoints (already in NFC) to an (NFG)
14
     * grapheme. */
15
    MVMNFGTrieNode *grapheme_lookup;
16
17
    /* Mutex used when we wish to do updates to the grapheme table. */
18
    uv_mutex_t update_mutex;
19
20
    /* Number of synthetics we have. */
21
    MVMint32 num_synthetics;
22
23
    /* Cached CRLF grapheme index, since we need it so often. */
24
    MVMGrapheme32 crlf_grapheme;
25
};
26
27
/* State held about a synthetic. */
28
struct MVMNFGSynthetic {
29
    /* The base (non-combining) grapheme. */
30
    /* The index of the base (non-combining) grapheme
31
     * set to -1 if it does not exist */
32
    MVMint32 base_index;
33
34
    /* The number of codepoints we have. */
35
    MVMint32 num_codes;
36
37
    /* Array of codepoints. */
38
    MVMCodepoint *codes;
39
40
    /* Cached case transforms, NULL if not calculated. */
41
    MVMGrapheme32 *case_uc;
42
    MVMGrapheme32 *case_lc;
43
    MVMGrapheme32 *case_tc;
44
    MVMGrapheme32 *case_fc;
45
46
    /* Grapheme counts of cached case transforms. */
47
    MVMint32 case_uc_graphs;
48
    MVMint32 case_lc_graphs;
49
    MVMint32 case_tc_graphs;
50
    MVMint32 case_fc_graphs;
51
52
    /* Is this a UTF-8 C-8 synthetic? */
53
    MVMint32 is_utf8_c8;
54
};
55
56
/* A node in the NFG trie. */
57
struct MVMNFGTrieNode {
58
    /* Set of entries for further traversal, sorted ascending on codepoint
59
     * so we can find an entry using binary search. */
60
    MVMNFGTrieNodeEntry *next_codes;
61
62
    /* Number of entries in next_cps. */
63
    MVMint32 num_entries;
64
65
    /* Non-zero if we reach a result at this node (and will always be negative
66
     * since it's an NFG synthetic). */
67
    MVMGrapheme32 graph;
68
};
69
70
/* An entry in the list of next possible codepoints in the NFG trie. */
71
struct MVMNFGTrieNodeEntry {
72
    /* The codepoint. */
73
    MVMCodepoint code;
74
75
    /* Trie node to traverse to if we find this node. */
76
    MVMNFGTrieNode *node;
77
};
78
79
/* The maximum number of codepoints we will allow in a synthetic grapheme.
80
 * This is a good bit higher than any real-world use case is going to run
81
 * in to. */
82
373
#define MVM_GRAPHEME_MAX_CODEPOINTS 1024
83
84
/* Functions related to grapheme handling. */
85
MVMGrapheme32 MVM_nfg_codes_to_grapheme(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes);
86
MVMGrapheme32 MVM_nfg_codes_to_grapheme_utf8_c8(MVMThreadContext *tc, MVMCodepoint *codes, MVMint32 num_codes);
87
MVMGrapheme32 MVM_nfg_crlf_grapheme(MVMThreadContext *tc);
88
MVMNFGSynthetic * MVM_nfg_get_synthetic_info(MVMThreadContext *tc, MVMGrapheme32 synth);
89
MVMuint32 MVM_nfg_get_case_change(MVMThreadContext *tc, MVMGrapheme32 codepoint, MVMint32 case_, MVMGrapheme32 **result);
90
MVMint32 MVM_nfg_is_concat_stable(MVMThreadContext *tc, MVMString *a, MVMString *b);
91
92
/* NFG subsystem initialization and cleanup. */
93
void MVM_nfg_init(MVMThreadContext *tc);
94
void MVM_nfg_destroy(MVMThreadContext *tc);